Created
June 2, 2016 16:36
-
-
Save wesleybowman/b312b18781e5cc1a6721eda0cd98d1cd to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
# In[1]: | |
# thanks to commandlineluser for this bit of code and the reasoning behind it | |
# https://www.reddit.com/user/commandlineluser | |
import requests | |
from lxml import html | |
from bs4 import BeautifulSoup | |
url = 'http://www.rockyboots.com/locator' | |
with requests.session() as s: | |
s.headers.update({ | |
'user-agent': 'Mozilla/5.0' | |
}) | |
r = s.get(url) | |
soup = BeautifulSoup(r.text, 'html.parser') | |
url = soup.find('form', {'method': 'post'})['action'] | |
r = s.post(url, data={ | |
'dwfrm_storelocator_address_states_stateUSCA': 'NS', | |
'dwfrm_storelocator_findbystate' : 'Search' | |
}) | |
# In[2]: | |
# now, find the addresses in the HTML, and make them to where Google can read them easily | |
# this is a test to make sure the output is how I wanted it | |
soup = BeautifulSoup(r.text, 'html.parser') | |
address = soup.find_all(class_="store-address") | |
temp = address[0].prettify().split('\n') | |
temp[1] + temp[3] | |
# In[3]: | |
# do the cell above for all the addresses at once | |
for addr in address: | |
temp = addr.prettify().split('\n') | |
output = temp[1] + temp[3] | |
print(output) | |
# In[ ]: | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment