# import libraries | |
from selenium import webdriver | |
from selenium.webdriver.common.keys import Keys | |
import time | |
import pandas as pd | |
# retrieve all NY branch/atm location names | |
state_link = 'https://locator.chase.com/ut' | |
driver = webdriver.Chrome(executable_path="/Users/erikgregorywebb/Downloads/chromedriver 2") | |
driver.get(state_link) | |
items = driver.find_elements_by_class_name('child') | |
locations = [] | |
for i in range(0, len(items)): | |
locations.append(items[i].text) | |
driver.close() | |
# convert branch/atm location names to urls | |
for i in range(0, len(locations)): | |
locations[i] = locations[i].lower() | |
if ' ' in locations[i]: | |
locations[i] = locations[i].replace(" ", "+") | |
location_links = [] | |
for i in range(0, len(locations)): | |
link = state_link + "/" + locations[i] | |
location_links.append(link) | |
# function to extract branch/atm location details | |
def getLocations(link): | |
driver = webdriver.Chrome(executable_path="/Users/erikgregorywebb/Downloads/chromedriver 2") | |
driver.get(link) | |
items = driver.find_elements_by_class_name('details') | |
locations = [] | |
for i in range(0, len(items)): | |
locations.append(items[i].text) | |
driver.close() | |
return locations | |
# execute the function for all branch/atms links | |
all_locations = [] | |
for link in location_links: | |
time.sleep(3) | |
try: locations = getLocations(link) | |
except: print(link) | |
all_locations.append(locations) | |
# clean the organize the data | |
final = pd.DataFrame() | |
for location in all_locations: | |
for i in range(0, len(location)): | |
location[i] = location[i].splitlines() | |
data = pd.DataFrame(location) | |
data = data[[0, 2, 3, 6]] | |
data = data.dropna() | |
data.columns = ['city', 'address1', 'address2', 'type'] | |
data['address'] = data['address1'] + " " + data['address2'] | |
data = data.drop(['address1', 'address2'], axis=1) | |
data['type'] = data.type.str.split().str.get(0) | |
data = data[['city', 'address', 'type']] # rearrange column names | |
final = pd.concat([final, data]) | |
# remove duplictates | |
ut_locations = final.drop_duplicates(subset = ['city', 'address', 'type'], keep = 'first') | |
# export the data | |
ut_locations.to_csv("ut-locations.csv", index = False) | |
# read in branch/atms locations | |
branches = pd.read_csv("/Users/erikgregorywebb/Documents/Python/chase-atm/utah-locations.csv") | |
addresses = branches['address'] | |
# prepare the API Call | |
gmaps = googlemaps.Client(key='YOUR-KEY-HERE') | |
# geocode addresses to obtain geographic coordinates | |
lats = [] | |
lngs = [] | |
for i in range(0, len(addresses)): | |
time.sleep(1) | |
geo = gmaps.geocode(addresses[i]) | |
lat = geo[0]['geometry']['location']['lat'] | |
lng = geo[0]['geometry']['location']['lng'] | |
lats.append(lat) | |
lngs.append(lng) | |
# format and export results | |
data = pd.DataFrame({'address': addresses, 'lat': lats, 'lng': lngs}) | |
final = pd.merge(branches, data, on='address', how='outer') | |
final.to_csv("utah-chase-branch-google-maps.csv", index = False) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment