Skip to content

Instantly share code, notes, and snippets.

@ColdSauce
Created December 30, 2014 20:05
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ColdSauce/0c8da327d6409f47992b to your computer and use it in GitHub Desktop.
Save ColdSauce/0c8da327d6409f47992b to your computer and use it in GitHub Desktop.
This Python script takes all adresses and phone numbers of local businesses and saves them in text files.
import urllib2
from bs4 import BeautifulSoup
html = urllib2.urlopen("http://www.yelp.com/search?find_loc=Schaumburg%2C+IL+60193").read()
soup = BeautifulSoup(html)
def writeToTextSelect(name):
for line in soup.select(name):
with open(str(name) + ".txt", "a") as f:
f.write(line.text.strip().encode('utf-8')+ "\n")
def createTextFiles():
#Two things that do the same thing. Put them in a function
writeToTextSelect(".biz-name")
writeToTextSelect('.biz-phone')
for line in soup.find_all("address"):
with open("addresses.txt", "a") as f:
f.write(line.text.strip().encode('utf-8') + "\n")
limit = 990
currentIteration = 0
while(currentIteration <= limit):
soup = BeautifulSoup(html)
html = urllib2.urlopen("http://www.yelp.com/search?find_loc=Schaumburg%2C+IL+60193?find_desc&start=" + str(currentIteration)).read()
createTextFiles()
currentIteration += 10
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment