Skip to content

Instantly share code, notes, and snippets.

@apfister
Created April 10, 2017 15:25
Show Gist options
  • Save apfister/38942433504ed8bcc6a6e285387f0e3d to your computer and use it in GitHub Desktop.
Save apfister/38942433504ed8bcc6a6e285387f0e3d to your computer and use it in GitHub Desktop.
import arcpy
import json
import csv
from datetime import datetime
import time
start_time = time.time()
arcpy.env.overwriteOutput = True
# geometry paths
geometry_dir = "geometry"
# Create insert cursor for table
working_path = "C:/Users/adam/Documents/ArcGIS/Projects/UNSD Data Import/sdg_data.gdb"
sdg_data_schema_template = '__sdg_data_schema_template'
fields = ["GoalId","TargetId","IndicatorRef","IndicatorRefOrder","IndicatorRefList","IndicatorId",
"IndicatorDesc","IndicatorFN","IndicatorTier","isRetired","SERIES","SERIES_Desc","SERIES_isOfficial","SERIES_show",
"sdgArea_Level","sdgArea_Order","sdgArea_Parent","REF_AREA","sdgArea_Name","isLDC","isLLDC","isSIDS",
"FREQ","SOURCE_TYPE","AGE_GROUP","LOCATION","SEX","VALUE_TYPE","UNIT","UNIT_MULT","TIME_PERIOD","OBS_VALUE","NATURE",
"FOOTNOTES","SOURCE_DETAIL","TIME_DETAIL", "SHAPE@"]
all_rows = {}
geometry_hash = {}
def parseCSV():
# csv file
csv_path = "input/sdg-data-all-test.csv"
# i have no idea why i need to include this encoding flag
# by some stroke of luck, i found the same issue and solution here: https://bugs.python.org/issue7185
with open(csv_path, encoding="utf-8-sig") as csv_file:
reader = csv.DictReader(csv_file)
for row in reader:
goal_id = row['GoalId']
target_id = row['TargetId']
indicator_ref = row['IndicatorRef']
indicator_ref_order = row['IndicatorRefOrder']
indicator_ref_list = row['IndicatorRefList']
indicator_id = row['IndicatorId']
indicator_desc = row['IndicatorDesc']
indicator_fn = row['IndicatorFN']
indicator_tier = row['IndicatorTier']
is_retired = row['isRetired']
series = row['SERIES']
series_desc = row['SERIES_Desc']
series_is_official = row['SERIES_isOfficial']
series_show = row['SERIES_show']
sdg_area_level = row['sdgArea_Level']
if sdg_area_level == 'NULL':
sdg_area_level = None
sdg_area_order = row['sdgArea_Order']
if sdg_area_order == 'NULL':
sdg_area_order = None
sdg_area_parent = row['sdgArea_Parent']
if sdg_area_parent == 'NULL':
sdg_area_parent = None
ref_area = row['REF_AREA']
sdg_area_name = row['sdgArea_Name']
is_ldc = row['isLDC']
if is_ldc == 'NULL':
is_ldc = None
is_lldc = row['isLLDC']
if is_lldc == 'NULL':
is_lldc = None
is_sids = row['isSIDS']
if is_sids == 'NULL':
is_sids = None
freq = row['FREQ']
source_type = row['SOURCE_TYPE']
age_group = row['AGE_GROUP']
location = row['LOCATION']
sex = row['SEX']
value_type = row['VALUE_TYPE']
unit = row['UNIT']
unit_mult = row['UNIT_MULT']
time_period = datetime.strptime(row['TIME_PERIOD'], '%Y')
# remove any instances of '<', '>' or ' '
obs_value = row['OBS_VALUE'].replace('>', '').replace('<', '').replace(' ', '')
nature = row['NATURE']
footnotes = row['FOOTNOTES']
source_detail = row['SOURCE_DETAIL']
time_detail = row['TIME_DETAIL']
try:
obs_value = float(row['OBS_VALUE'])
except:
pass
row_hash_id = series
row = (goal_id, target_id, indicator_ref, indicator_ref_order, indicator_ref_list, indicator_id,
indicator_desc, indicator_fn, indicator_tier, is_retired, series, series_desc, series_is_official, series_show,
sdg_area_level, sdg_area_order, sdg_area_parent, ref_area, sdg_area_name, is_ldc, is_lldc, is_sids,
freq, source_type, age_group, location, sex, value_type, unit, unit_mult, time_period, obs_value, nature,
footnotes, source_detail, time_detail)
if row_hash_id not in all_rows:
all_rows[row_hash_id] = []
all_rows[row_hash_id].append(row)
return all_rows
def createFeatureClass(fc_name):
arcpy.CreateFeatureclass_management(working_path,
fc_name,
'POLYGON',
working_path + '/' + sdg_data_schema_template)
def insertData():
total_rows = len(all_rows.items())
row_counter = 1
for fc_name, fc_rows in all_rows.items():
# print (fc_name, fc_rows, len(fc_rows))
createFeatureClass(fc_name)
# print ('created feature class ' + fc_name)
insert_cursor = arcpy.da.InsertCursor(working_path + '/' + fc_name, fields)
for row in fc_rows:
ref_area = row[17]
# try to load geometry from in-memory hash. otherwise, load from file and then store it in hash for future requests
esri_geometry = None
if ref_area not in geometry_hash:
try:
geometry_path = geometry_dir + '/' + ref_area +'-esri.json'
raw_geometry = open(geometry_path).read()
geometry = json.loads(raw_geometry)
esri_geometry = arcpy.AsShape(geometry, True)
geometry_hash[ref_area] = esri_geometry
except:
pass
else:
esri_geometry = geometry_hash[ref_area]
row_list = list(row)
row_list.append(esri_geometry);
row_tuple = tuple(row_list)
try:
insert_cursor.insertRow( row_tuple )
except:
print ('error inserting row for ' + fc_name)
print (row_tuple)
raise
print ('created ' + fc_name + ' with ' + str(len(fc_rows)) + ' rows. (' + str(row_counter) + ' of ' + str(total_rows) + ' total Series Datasets)')
row_counter = row_counter + 1
del row
del fc_rows
if __name__ == "__main__":
print ('reading csv file ..')
parseCSV()
print ('csv file loaded.')
print ('inserting data ..')
insertData()
end_time = time.time()
elapsed = end_time - start_time
print ('done!')
print ('total time : (' + elapsed + ') seconds')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment