Skip to content

Instantly share code, notes, and snippets.

@erikgregorywebb
Last active October 3, 2018 02:39
Show Gist options
  • Save erikgregorywebb/a59aaf59dd6e28d6587df599bdf122b3 to your computer and use it in GitHub Desktop.
Save erikgregorywebb/a59aaf59dd6e28d6587df599bdf122b3 to your computer and use it in GitHub Desktop.
# import libraries
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
import pandas as pd
# retrieve all NY branch/atm location names
state_link = 'https://locator.chase.com/ut'
driver = webdriver.Chrome(executable_path="/Users/erikgregorywebb/Downloads/chromedriver 2")
driver.get(state_link)
items = driver.find_elements_by_class_name('child')
locations = []
for i in range(0, len(items)):
locations.append(items[i].text)
driver.close()
# convert branch/atm location names to urls
for i in range(0, len(locations)):
locations[i] = locations[i].lower()
if ' ' in locations[i]:
locations[i] = locations[i].replace(" ", "+")
location_links = []
for i in range(0, len(locations)):
link = state_link + "/" + locations[i]
location_links.append(link)
# function to extract branch/atm location details
def getLocations(link):
driver = webdriver.Chrome(executable_path="/Users/erikgregorywebb/Downloads/chromedriver 2")
driver.get(link)
items = driver.find_elements_by_class_name('details')
locations = []
for i in range(0, len(items)):
locations.append(items[i].text)
driver.close()
return locations
# execute the function for all branch/atms links
all_locations = []
for link in location_links:
time.sleep(3)
try: locations = getLocations(link)
except: print(link)
all_locations.append(locations)
# clean the organize the data
final = pd.DataFrame()
for location in all_locations:
for i in range(0, len(location)):
location[i] = location[i].splitlines()
data = pd.DataFrame(location)
data = data[[0, 2, 3, 6]]
data = data.dropna()
data.columns = ['city', 'address1', 'address2', 'type']
data['address'] = data['address1'] + " " + data['address2']
data = data.drop(['address1', 'address2'], axis=1)
data['type'] = data.type.str.split().str.get(0)
data = data[['city', 'address', 'type']] # rearrange column names
final = pd.concat([final, data])
# remove duplictates
ut_locations = final.drop_duplicates(subset = ['city', 'address', 'type'], keep = 'first')
# export the data
ut_locations.to_csv("ut-locations.csv", index = False)
# read in branch/atms locations
branches = pd.read_csv("/Users/erikgregorywebb/Documents/Python/chase-atm/utah-locations.csv")
addresses = branches['address']
# prepare the API Call
gmaps = googlemaps.Client(key='YOUR-KEY-HERE')
# geocode addresses to obtain geographic coordinates
lats = []
lngs = []
for i in range(0, len(addresses)):
time.sleep(1)
geo = gmaps.geocode(addresses[i])
lat = geo[0]['geometry']['location']['lat']
lng = geo[0]['geometry']['location']['lng']
lats.append(lat)
lngs.append(lng)
# format and export results
data = pd.DataFrame({'address': addresses, 'lat': lats, 'lng': lngs})
final = pd.merge(branches, data, on='address', how='outer')
final.to_csv("utah-chase-branch-google-maps.csv", index = False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment