Created
April 10, 2017 15:25
-
-
Save apfister/38942433504ed8bcc6a6e285387f0e3d to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import arcpy | |
import json | |
import csv | |
from datetime import datetime | |
import time | |
start_time = time.time() | |
arcpy.env.overwriteOutput = True | |
# geometry paths | |
geometry_dir = "geometry" | |
# Create insert cursor for table | |
working_path = "C:/Users/adam/Documents/ArcGIS/Projects/UNSD Data Import/sdg_data.gdb" | |
sdg_data_schema_template = '__sdg_data_schema_template' | |
fields = ["GoalId","TargetId","IndicatorRef","IndicatorRefOrder","IndicatorRefList","IndicatorId", | |
"IndicatorDesc","IndicatorFN","IndicatorTier","isRetired","SERIES","SERIES_Desc","SERIES_isOfficial","SERIES_show", | |
"sdgArea_Level","sdgArea_Order","sdgArea_Parent","REF_AREA","sdgArea_Name","isLDC","isLLDC","isSIDS", | |
"FREQ","SOURCE_TYPE","AGE_GROUP","LOCATION","SEX","VALUE_TYPE","UNIT","UNIT_MULT","TIME_PERIOD","OBS_VALUE","NATURE", | |
"FOOTNOTES","SOURCE_DETAIL","TIME_DETAIL", "SHAPE@"] | |
all_rows = {} | |
geometry_hash = {} | |
def parseCSV(): | |
# csv file | |
csv_path = "input/sdg-data-all-test.csv" | |
# i have no idea why i need to include this encoding flag | |
# by some stroke of luck, i found the same issue and solution here: https://bugs.python.org/issue7185 | |
with open(csv_path, encoding="utf-8-sig") as csv_file: | |
reader = csv.DictReader(csv_file) | |
for row in reader: | |
goal_id = row['GoalId'] | |
target_id = row['TargetId'] | |
indicator_ref = row['IndicatorRef'] | |
indicator_ref_order = row['IndicatorRefOrder'] | |
indicator_ref_list = row['IndicatorRefList'] | |
indicator_id = row['IndicatorId'] | |
indicator_desc = row['IndicatorDesc'] | |
indicator_fn = row['IndicatorFN'] | |
indicator_tier = row['IndicatorTier'] | |
is_retired = row['isRetired'] | |
series = row['SERIES'] | |
series_desc = row['SERIES_Desc'] | |
series_is_official = row['SERIES_isOfficial'] | |
series_show = row['SERIES_show'] | |
sdg_area_level = row['sdgArea_Level'] | |
if sdg_area_level == 'NULL': | |
sdg_area_level = None | |
sdg_area_order = row['sdgArea_Order'] | |
if sdg_area_order == 'NULL': | |
sdg_area_order = None | |
sdg_area_parent = row['sdgArea_Parent'] | |
if sdg_area_parent == 'NULL': | |
sdg_area_parent = None | |
ref_area = row['REF_AREA'] | |
sdg_area_name = row['sdgArea_Name'] | |
is_ldc = row['isLDC'] | |
if is_ldc == 'NULL': | |
is_ldc = None | |
is_lldc = row['isLLDC'] | |
if is_lldc == 'NULL': | |
is_lldc = None | |
is_sids = row['isSIDS'] | |
if is_sids == 'NULL': | |
is_sids = None | |
freq = row['FREQ'] | |
source_type = row['SOURCE_TYPE'] | |
age_group = row['AGE_GROUP'] | |
location = row['LOCATION'] | |
sex = row['SEX'] | |
value_type = row['VALUE_TYPE'] | |
unit = row['UNIT'] | |
unit_mult = row['UNIT_MULT'] | |
time_period = datetime.strptime(row['TIME_PERIOD'], '%Y') | |
# remove any instances of '<', '>' or ' ' | |
obs_value = row['OBS_VALUE'].replace('>', '').replace('<', '').replace(' ', '') | |
nature = row['NATURE'] | |
footnotes = row['FOOTNOTES'] | |
source_detail = row['SOURCE_DETAIL'] | |
time_detail = row['TIME_DETAIL'] | |
try: | |
obs_value = float(row['OBS_VALUE']) | |
except: | |
pass | |
row_hash_id = series | |
row = (goal_id, target_id, indicator_ref, indicator_ref_order, indicator_ref_list, indicator_id, | |
indicator_desc, indicator_fn, indicator_tier, is_retired, series, series_desc, series_is_official, series_show, | |
sdg_area_level, sdg_area_order, sdg_area_parent, ref_area, sdg_area_name, is_ldc, is_lldc, is_sids, | |
freq, source_type, age_group, location, sex, value_type, unit, unit_mult, time_period, obs_value, nature, | |
footnotes, source_detail, time_detail) | |
if row_hash_id not in all_rows: | |
all_rows[row_hash_id] = [] | |
all_rows[row_hash_id].append(row) | |
return all_rows | |
def createFeatureClass(fc_name): | |
arcpy.CreateFeatureclass_management(working_path, | |
fc_name, | |
'POLYGON', | |
working_path + '/' + sdg_data_schema_template) | |
def insertData(): | |
total_rows = len(all_rows.items()) | |
row_counter = 1 | |
for fc_name, fc_rows in all_rows.items(): | |
# print (fc_name, fc_rows, len(fc_rows)) | |
createFeatureClass(fc_name) | |
# print ('created feature class ' + fc_name) | |
insert_cursor = arcpy.da.InsertCursor(working_path + '/' + fc_name, fields) | |
for row in fc_rows: | |
ref_area = row[17] | |
# try to load geometry from in-memory hash. otherwise, load from file and then store it in hash for future requests | |
esri_geometry = None | |
if ref_area not in geometry_hash: | |
try: | |
geometry_path = geometry_dir + '/' + ref_area +'-esri.json' | |
raw_geometry = open(geometry_path).read() | |
geometry = json.loads(raw_geometry) | |
esri_geometry = arcpy.AsShape(geometry, True) | |
geometry_hash[ref_area] = esri_geometry | |
except: | |
pass | |
else: | |
esri_geometry = geometry_hash[ref_area] | |
row_list = list(row) | |
row_list.append(esri_geometry); | |
row_tuple = tuple(row_list) | |
try: | |
insert_cursor.insertRow( row_tuple ) | |
except: | |
print ('error inserting row for ' + fc_name) | |
print (row_tuple) | |
raise | |
print ('created ' + fc_name + ' with ' + str(len(fc_rows)) + ' rows. (' + str(row_counter) + ' of ' + str(total_rows) + ' total Series Datasets)') | |
row_counter = row_counter + 1 | |
del row | |
del fc_rows | |
if __name__ == "__main__": | |
print ('reading csv file ..') | |
parseCSV() | |
print ('csv file loaded.') | |
print ('inserting data ..') | |
insertData() | |
end_time = time.time() | |
elapsed = end_time - start_time | |
print ('done!') | |
print ('total time : (' + elapsed + ') seconds') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment