Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Datateam datafreshness #339

Open
wants to merge 17 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 48 additions & 0 deletions civis/KYC/kyc_311_to_esri_socr_v1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#!/usr/bin/env python
# coding: utf-8

import os
pwd=os.getcwd()
import sys
#!{sys.executable} -m pip install sodapy
from sodapy import Socrata
import pandas as pd
import numpy as np
from arcgis.gis import GIS
from shapely.geometry import Point
import geopandas as gpd
from arcgis.features import FeatureLayerCollection
import intake_civis
import datetime

lahub_user = os.environ["LAHUB_ACC_USERNAME"]
lahub_pass = os.environ["LAHUB_ACC_PASSWORD"]
socrata_token = 'LJ60SFL7ZqoC4IWosLhEmJV2a'
socrata_user = os.environ["SOCRATA_ACC_USERNAME"]
socrata_pass = os.environ["SOCRATA_ACC_PASSWORD"]
myla311_layer = '4db3e9c3d13543b6a686098e0603ddcf'
pwd = os.getcwd()
OUTPUT_FILE = pwd + "/MyLA311 Service Requests Last 6 Months.csv"

def prep_311_data(file,token,user,pas):
client = Socrata("data.lacity.org", token, username=user, password=pas)
df = pd.DataFrame(client.get('rq3b-xjk8', limit=10000000))
df2=df[(df.requesttype != 'Homeless Encampment')]
df2['createddate']= pd.to_datetime(df2['createddate'])
mask = df2['createddate'] <= datetime.datetime.now()
df2 = df2.loc[mask]
range_max = df2['createddate'].max()
range_min = range_max - pd.DateOffset(months=6)
df2 = df2[(df2['createddate'] >= range_min)]
df2.to_csv(file, index=False)

def update_geohub_layer(user, pw, layer, update_data):
geohub = GIS('https://lahub.maps.arcgis.com', user, pw)
flayer = geohub.content.get(layer)
flayer_collection = FeatureLayerCollection.fromitem(flayer)
flayer_collection.manager.overwrite(update_data)


if __name__ == "__main__":
prep_311_data(OUTPUT_FILE,socrata_token,socrata_user,socrata_pass)
update_geohub_layer(lahub_user, lahub_pass, myla311_layer, OUTPUT_FILE)
53 changes: 53 additions & 0 deletions civis/KYC/kyc_permits_to_esri.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#!/usr/bin/env python
# coding: utf-8

#--Pulling Last 6 Months of Commercial & Apartment Data


import os
pwd=os.getcwd()
import sys
#!{sys.executable} -m pip install sodapy
from sodapy import Socrata
import pandas as pd
import numpy as np
from arcgis.gis import GIS
from shapely.geometry import Point
import geopandas as gpd
from arcgis.features import FeatureLayerCollection
import intake_civis
import datetime

lahub_user = os.environ["LAHUB_ACC_USERNAME"]
lahub_pass = os.environ["LAHUB_ACC_PASSWORD"]
socrata_token = 'LJ60SFL7ZqoC4IWosLhEmJV2a'
socrata_user = os.environ["SOCRATA_ACC_USERNAME"]
socrata_pass = os.environ["SOCRATA_ACC_PASSWORD"]
permit_layer = '48fca217dd5a410bbfd6ce0abcdd3a26'
pwd = os.getcwd()
OUTPUT_FILE = pwd + "/Building and Safety Permits, Last 6 Months.csv"

def prep_permit_data(file,token,user,pas):
client = Socrata("data.lacity.org", token, username=user, password=pas)
df = pd.DataFrame(client.get('n9nq-vewq', limit=10000000))
df2=df[((df.permit_sub_type == 'Apartment')|(df.permit_sub_type == 'Commercial'))&(
(df.permit_type == 'Bldg-Addition')|(df.permit_type == 'Bldg-New')|(df.permit_type == 'Bldg-Demolition'))]
df2['issue_date']= pd.to_datetime(df2['issue_date'])
mask = df2['issue_date'] <= datetime.datetime.now()
df2 = df2.loc[mask]
range_max = df2['issue_date'].max()
range_min = range_max - pd.DateOffset(months=6)
df2 = df2[(df2['issue_date'] >= range_min) &
(df2['issue_date'] <= range_max)]
df2.to_csv(file, index=False)

def update_geohub_layer(user, pw, layer, update_data):
geohub = GIS('https://lahub.maps.arcgis.com', user, pw)
flayer = geohub.content.get(layer)
flayer_collection = FeatureLayerCollection.fromitem(flayer)
flayer_collection.manager.overwrite(update_data)


if __name__ == "__main__":
prep_permit_data(OUTPUT_FILE,socrata_token,socrata_user,socrata_pass)
update_geohub_layer(lahub_user, lahub_pass, permit_layer, OUTPUT_FILE)
52 changes: 52 additions & 0 deletions civis/KYC/kyc_permits_to_esri_v2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
#!/usr/bin/env python
# coding: utf-8

#--Pulling Last 6 Months of Commercial & Apartment Data


import os
pwd=os.getcwd()
import sys
#!{sys.executable} -m pip install sodapy
from sodapy import Socrata
import pandas as pd
import numpy as np
from arcgis.gis import GIS
from shapely.geometry import Point
import geopandas as gpd
from arcgis.features import FeatureLayerCollection
import intake_civis
import datetime

lahub_user = os.environ["LAHUB_ACC_USERNAME"]
lahub_pass = os.environ["LAHUB_ACC_PASSWORD"]
socrata_token = 'LJ60SFL7ZqoC4IWosLhEmJV2a'
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

May be worth also putting this in an environment variable, if it should be considered sensitive.

socrata_user = os.environ["SOCRATA_ACC_USERNAME"]
socrata_pass = os.environ["SOCRATA_ACC_PASSWORD"]
permit_layer = '48fca217dd5a410bbfd6ce0abcdd3a26'
pwd = os.getcwd()
OUTPUT_FILE = pwd + "/Building and Safety Permits, Last 6 Months.csv"

def prep_permit_data(file,token,user,pas):
client = Socrata("data.lacity.org", token, username=user, password=pas)
df = pd.DataFrame(client.get('n9nq-vewq', limit=10000000))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are we guaranteed that by limiting to ten million rows, we get all requests from the last six months? If we do get back ten million rows, might it crash the instance we are running?

It may be worth executing a SoQL query to get a more targeted subset, rather than reading them all into memory. Unless you already tried that?

df2=df[((df.permit_sub_type == 'Apartment')|(df.permit_sub_type == 'Commercial'))&(
(df.permit_type == 'Bldg-Addition')|(df.permit_type == 'Bldg-New')|(df.permit_type == 'Bldg-Demolition'))]
df2['issue_date']= pd.to_datetime(df2['issue_date'])
mask = df2['issue_date'] <= datetime.datetime.now()
df2 = df2.loc[mask]
range_max = df2['issue_date'].max()
range_min = range_max - pd.DateOffset(months=6)
df2 = df2[(df2['issue_date'] >= range_min)]
df2.to_csv(file, index=False)

def update_geohub_layer(user, pw, layer, update_data):
geohub = GIS('https://lahub.maps.arcgis.com', user, pw)
flayer = geohub.content.get(layer)
flayer_collection = FeatureLayerCollection.fromitem(flayer)
flayer_collection.manager.overwrite(update_data)


if __name__ == "__main__":
prep_permit_data(OUTPUT_FILE,socrata_token,socrata_user,socrata_pass)
update_geohub_layer(lahub_user, lahub_pass, permit_layer, OUTPUT_FILE)
Loading