Skip to content

Instantly share code, notes, and snippets.

@AlexArcPy
Created April 3, 2018 09:07
Show Gist options
  • Save AlexArcPy/ab75709c26289f134c42973835a59c22 to your computer and use it in GitHub Desktop.
Save AlexArcPy/ab75709c26289f134c42973835a59c22 to your computer and use it in GitHub Desktop.
Demonstrating efficient use of heapq.merge when working with arcpy.da.SearchCursor structures
'''
Demonstrating efficient use of heapq.merge when working with
arcpy.da.SearchCursor structures. The heapq.merge function will
merge multiple sorted inputs into a single sorted output which
can be useful when iterating over multiple arcpy.da.SearchCursor
iterators.
'''
from pprint import pprint
import heapq
import itertools
import arcpy
arcpy.env.overwriteOutput = True
arcpy.env.workspace = r'C:\GIS\Temp\ArcGISHomeFolder\sample.gdb'
STATES = ['California', 'Texas', 'Arizona']
FC = (r'C:\Program Files (x86)\ArcGIS\Desktop10.5\TemplateData'
r'\TemplateData.gdb\USA\cities')
# preparing the sample data by exporting individual feature classes
states_written = []
for state_name in STATES:
state_name_normalized = state_name.replace(' ', '_')
arcpy.Select_analysis(
in_features=FC,
out_feature_class='cities_{}'.format(state_name),
where_clause='''STATE_NAME = '{}' '''.format(state_name_normalized),
)
states_written.append(state_name_normalized)
# getting the list of cursors for each individual feature class
cursors = [
arcpy.da.SearchCursor(
'cities_{}'.format(state_name),
['CITY_NAME', 'POP1990', 'STATE_NAME'],
sql_clause=(None, 'ORDER BY POP1990 DESC'),
) for state_name in states_written
]
@profile
# ----------------------------------------------------------------------
def heapq_solution():
# sort by the POP1990 field DESC
merged = heapq.merge(*cursors, key=lambda x: x[1], reverse=True)
# print 10 most populated cities in specified states
result = list(itertools.islice(merged, 10))
return result
@profile
# ----------------------------------------------------------------------
def sorted_solution():
result = sorted(
itertools.chain(*cursors), key=lambda x: x[1], reverse=True)
return result
pprint(heapq_solution())
# resetting all da.SearchCursor to iterate over again
for cur in cursors:
cur.reset()
pprint(sorted_solution()[:10])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment