Skip to content

Instantly share code, notes, and snippets.

@jameslee0920
Last active November 18, 2016 22:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jameslee0920/bf19eabc84ac76ee33c53932cd9b579f to your computer and use it in GitHub Desktop.
Save jameslee0920/bf19eabc84ac76ee33c53932cd9b579f to your computer and use it in GitHub Desktop.
#import functions
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.common.exceptions import NoSuchElementException
from bs4 import BeautifulSoup
from selenium.webdriver.support.ui import Select
from geopy.geocoders import Nominatim
import unicodedata
import re
import pandas as pd
#Open Driver Path
driver = webdriver.Chrome('/Users/James/Desktop/chromedriver.exe')
#Add in time/date/#party values
looptime = ['18']
loopdate = ['2016-11-26']
loopparty = ['4']
#Create Empty Lists to append in loop
restaurant = []
cuisinel = []
diningl = []
addressl = []
pricel = []
restaurant2 = []
cuisine2l = []
websitel = []
#Begin Loop to find name of restaurants that fit the specified values above
for p in loopparty:
for d in loopdate:
for i in looptime:
driver.get("http://www.opentable.com/s/?covers="+p+"&currentview=list&datetime="+d+"+"+i+"%3A00&metroid=8&regionids=16&size=100&sort=Popularity&from=0")
html = BeautifulSoup(driver.page_source)
pagen = int(html.find_all('span', {'class':'js-pagination-page pagination-link '})[-1].text)*100
for j in range(0, pagen, 100):
pagel = str(j)
driver.get("http://www.opentable.com/s/?covers="+p+"&currentview=list&datetime="+d+"+"+i+"%3A00&metroid=8&regionids=16&size=100&sort=Popularity&from="+pagel)
rhtml = BeautifulSoup(driver.page_source)
restaurantlist = rhtml.find_all('span', {'class':"rest-row-name-text"})[3:]
for r in range(0,len(restaurantlist)):
restaurant.append(unicodedata.normalize("NFKD", restaurantlist[r].text).encode('ascii','ignore'))
#Format string to fit restaurant link
restaurantl = [re.sub('&', 'and',s) for s in restaurant]
restaurantl = [re.sub(' - ', '-', s) for s in restaurantl]
restaurantl = [re.sub(r'[^a-zA-Z0-9\s-]+', '', s) for s in restaurantl]
restaurantl = [re.sub(r'\s+', '-', s) for s in restaurantl]
#Open each restaurant's page to add information
for l in restaurantl:
try: #First restaurant link possibility
driver.get("http://www.opentable.com/r/"+l+"-new-york")
hide = driver.find_element(By.XPATH, '//*[@id="info"]/div[6]/a')
hide.click()
xcuisine = driver.find_element(By.XPATH, '//*[@id="profile-details"]/div/div/div[1]/p[2]/span[2]').text
xcuisine = str(unicodedata.normalize("NFKD", xcuisine).encode('ascii','ignore'))
cuisine = xcuisine.split(',')[0]
cuisinel.append(cuisine)
dining = str(driver.find_element(By.XPATH, '//*[@id="profile-details"]/div/div/div[1]/p[1]/span[2]').text)
diningl.append(dining)
baddress = driver.find_element(By.XPATH, '//*[@id="info"]/div[2]/div/div[2]/div/div').text
baddress = str(unicodedata.normalize("NFKD", baddress).encode('ascii','ignore')) #normalize string just in case it has an accent
baddress = re.sub('\\n', ' ', baddress)
addressl.append(baddress)
nhtml = BeautifulSoup(driver.page_source)
title = nhtml.find_all('h1', {'itemprop':'name'})[0].text
restaurant2.append(str(unicodedata.normalize("NFKD", title).encode('ascii','ignore'))) #normalize string just in case it has an accent
cuisine2 = nhtml.find_all('li', {'class': 'profile-header-meta-item'})[0].text
cuisine2l.append(str(unicodedata.normalize("NFKD", cuisine2).encode('ascii','ignore'))) #normalize string just in case it has an accent
website = "http://www.opentable.com/r/"+l+"-new-york"+"?covers="+loopparty[0]+"&dateTime="+loopdate[0]+"%20"+looptime[0]+"%3A00"
websitel.append(website)
except NoSuchElementException as e:
try: #second restaurant link possibility repeated
driver.get("http://www.opentable.com/"+l)
hide = driver.find_element(By.XPATH, '//*[@id="info"]/div[6]/a')
hide.click()
xcuisine = driver.find_element(By.XPATH, '//*[@id="profile-details"]/div/div/div[1]/p[2]/span[2]').text
xcuisine = str(unicodedata.normalize("NFKD", xcuisine).encode('ascii','ignore'))
cuisine = xcuisine.split(',')[0]
cuisinel.append(cuisine)
dining = str(driver.find_element(By.XPATH, '//*[@id="profile-details"]/div/div/div[1]/p[1]/span[2]').text)
diningl.append(dining)
baddress = driver.find_element(By.XPATH, '//*[@id="info"]/div[2]/div/div[2]/div/div').text
baddress = str(unicodedata.normalize("NFKD", baddress).encode('ascii','ignore'))
baddress = re.sub('\\n', ' ', baddress)
addressl.append(baddress)
nhtml = BeautifulSoup(driver.page_source)
title = nhtml.find_all('h1', {'itemprop':'name'})[0].text
restaurant2.append(str(unicodedata.normalize("NFKD", title).encode('ascii','ignore')))
cuisine2 = nhtml.find_all('li', {'class': 'profile-header-meta-item'})[0].text
cuisine2l.append(str(unicodedata.normalize("NFKD", cuisine2).encode('ascii','ignore')))
website = "http://www.opentable.com/"+l+"?covers="+loopparty[0]+"&dateTime="+loopdate[0]+"%20"+looptime[0]+"%3A00"
websitel.append(website)
except NoSuchElementException as e:
try: #Third Restaurant Link Possibility repeated
driver.get("http://www.opentable.com/r/"+l)
hide = driver.find_element(By.XPATH, '//*[@id="info"]/div[6]/a')
hide.click()
xcuisine = driver.find_element(By.XPATH, '//*[@id="profile-details"]/div/div/div[1]/p[2]/span[2]').text
xcuisine = str(unicodedata.normalize("NFKD", xcuisine).encode('ascii','ignore'))
cuisine = xcuisine.split(',')[0]
cuisinel.append(cuisine)
dining = str(driver.find_element(By.XPATH, '//*[@id="profile-details"]/div/div/div[1]/p[1]/span[2]').text)
diningl.append(dining)
baddress = driver.find_element(By.XPATH, '//*[@id="info"]/div[2]/div/div[2]/div/div').text
baddress = str(unicodedata.normalize("NFKD", baddress).encode('ascii','ignore'))
baddress = re.sub('\\n', ' ', baddress)
addressl.append(baddress)
nhtml = BeautifulSoup(driver.page_source)
title = nhtml.find_all('h1', {'itemprop':'name'})[0].text
restaurant2.append(str(unicodedata.normalize("NFKD", title).encode('ascii','ignore')))
cuisine2 = nhtml.find_all('li', {'class': 'profile-header-meta-item'})[0].text
cuisine2l.append(str(unicodedata.normalize("NFKD", cuisine2).encode('ascii','ignore')))
website = "http://www.opentable.com/r/"+l+"?covers="+loopparty[0]+"&dateTime="+loopdate[0]+"%20"+looptime[0]+"%3A00"
websitel.append(website)
except NoSuchElementException as e: #If restaurant does not fit the above three, it is an exception.
print(l)
restaurantl.remove(l)
tabledata = pd.DataFrame({'Restaurant': restaurant2, 'Address': addressl, 'Cuisine': cuisine2l, 'DiningStyle': diningl})
tabledata.to_csv('OpenTableData3.csv')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment