Skip to content

Instantly share code, notes, and snippets.

@SohanChy
Created April 19, 2020 09:20
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save SohanChy/c8b52899e8089188ef4f967acf108f9a to your computer and use it in GitHub Desktop.
Save SohanChy/c8b52899e8089188ef4f967acf108f9a to your computer and use it in GitHub Desktop.
Multithreaded Python Script For BULK Fetching GEOCODE data from Google Maps API
import json
import requests
import threading
SECRET_API_KEY = 'KEY_HERE'
# Json must be an array of objects
# eg: [{},{},{}]
input_arr_json_file = 'lookup_upazillas.json'
# field to use for lookup
def get_address_field(row):
# row['label'] is again json encoded thats why second decode is done
# you may not need it
return json.loads(row['label'])['en']
# will be printed to console
identifier_field = "id"
# will be pushed to parent level
def append_lat_long(lat, long, row_copy):
row_copy['latitude'] = lat
row_copy['longitude'] = long
# append metadata
def append_meta(row_copy, isResolved, resolved_data):
row_copy['meta'] = {
'is_resolved': isResolved,
'resolved_data': resolved_data
}
# geocode lookup query
def get_loc_lookup_string(row):
return "{}, {}".format(get_address_field(row), "Bangladesh")
# throttle threads at
thread_throttle = 5
def actual_lookup(resolved_data, new_data, num, total, row):
print("Resolving: {} of {} - {}, {}, {}".format(
num, total, row[identifier_field], get_address_field(row), get_loc_lookup_string(row)
)
)
response = requests.get('https://maps.googleapis.com/maps/api/geocode/json',
{
'address': get_loc_lookup_string(new_data),
'key': SECRET_API_KEY
}).json()
if len(response['results']) > 0:
print("RESOLVED: {} of {} - {}, {}, {}".format(
num, total, row[identifier_field], get_address_field(row), get_loc_lookup_string(row)
)
)
append_meta(new_data, True, response['results'][0]['geometry']['location'])
else:
print("RESOLVE_FAILED: {} of {} - {}, {}, {}".format(
num, total, row[identifier_field], get_address_field(row), get_loc_lookup_string(row)
)
)
append_meta(new_data, False, None)
resolved_data.append(new_data)
def multi_threaded(data):
resolved_data = []
total = len(data)
threads = []
for num, row in enumerate(data):
if type(row) is not dict:
print('ERROR: Row not a dictionary/object');
return
new_data = row.copy()
t = threading.Thread(target=actual_lookup, args=(resolved_data, new_data, num, total, row,))
threads.append(t)
running_threads = []
for thread in threads:
if len(running_threads) >= thread_throttle:
print("throttling threads...")
while len(running_threads) > 0:
rt = running_threads[0]
rt.join()
running_threads.pop(0)
thread.start()
running_threads.append(thread)
return resolved_data
def main():
with open(input_arr_json_file) as json_file:
data = json.load(json_file)
resolved_data = multi_threaded(data)
with open('resolved.json', 'w') as outfile:
json.dump(resolved_data, outfile)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment