Created
December 30, 2014 20:05
-
-
Save ColdSauce/0c8da327d6409f47992b to your computer and use it in GitHub Desktop.
This Python script takes all adresses and phone numbers of local businesses and saves them in text files.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import urllib2 | |
from bs4 import BeautifulSoup | |
html = urllib2.urlopen("http://www.yelp.com/search?find_loc=Schaumburg%2C+IL+60193").read() | |
soup = BeautifulSoup(html) | |
def writeToTextSelect(name): | |
for line in soup.select(name): | |
with open(str(name) + ".txt", "a") as f: | |
f.write(line.text.strip().encode('utf-8')+ "\n") | |
def createTextFiles(): | |
#Two things that do the same thing. Put them in a function | |
writeToTextSelect(".biz-name") | |
writeToTextSelect('.biz-phone') | |
for line in soup.find_all("address"): | |
with open("addresses.txt", "a") as f: | |
f.write(line.text.strip().encode('utf-8') + "\n") | |
limit = 990 | |
currentIteration = 0 | |
while(currentIteration <= limit): | |
soup = BeautifulSoup(html) | |
html = urllib2.urlopen("http://www.yelp.com/search?find_loc=Schaumburg%2C+IL+60193?find_desc&start=" + str(currentIteration)).read() | |
createTextFiles() | |
currentIteration += 10 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment