Last active
November 18, 2016 22:05
-
-
Save jameslee0920/bf19eabc84ac76ee33c53932cd9b579f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#import functions | |
from selenium import webdriver | |
from selenium.webdriver.common.keys import Keys | |
from selenium.webdriver.common.by import By | |
from selenium.webdriver.support.ui import WebDriverWait | |
from selenium.common.exceptions import NoSuchElementException | |
from bs4 import BeautifulSoup | |
from selenium.webdriver.support.ui import Select | |
from geopy.geocoders import Nominatim | |
import unicodedata | |
import re | |
import pandas as pd | |
#Open Driver Path | |
driver = webdriver.Chrome('/Users/James/Desktop/chromedriver.exe') | |
#Add in time/date/#party values | |
looptime = ['18'] | |
loopdate = ['2016-11-26'] | |
loopparty = ['4'] | |
#Create Empty Lists to append in loop | |
restaurant = [] | |
cuisinel = [] | |
diningl = [] | |
addressl = [] | |
pricel = [] | |
restaurant2 = [] | |
cuisine2l = [] | |
websitel = [] | |
#Begin Loop to find name of restaurants that fit the specified values above | |
for p in loopparty: | |
for d in loopdate: | |
for i in looptime: | |
driver.get("http://www.opentable.com/s/?covers="+p+"¤tview=list&datetime="+d+"+"+i+"%3A00&metroid=8®ionids=16&size=100&sort=Popularity&from=0") | |
html = BeautifulSoup(driver.page_source) | |
pagen = int(html.find_all('span', {'class':'js-pagination-page pagination-link '})[-1].text)*100 | |
for j in range(0, pagen, 100): | |
pagel = str(j) | |
driver.get("http://www.opentable.com/s/?covers="+p+"¤tview=list&datetime="+d+"+"+i+"%3A00&metroid=8®ionids=16&size=100&sort=Popularity&from="+pagel) | |
rhtml = BeautifulSoup(driver.page_source) | |
restaurantlist = rhtml.find_all('span', {'class':"rest-row-name-text"})[3:] | |
for r in range(0,len(restaurantlist)): | |
restaurant.append(unicodedata.normalize("NFKD", restaurantlist[r].text).encode('ascii','ignore')) | |
#Format string to fit restaurant link | |
restaurantl = [re.sub('&', 'and',s) for s in restaurant] | |
restaurantl = [re.sub(' - ', '-', s) for s in restaurantl] | |
restaurantl = [re.sub(r'[^a-zA-Z0-9\s-]+', '', s) for s in restaurantl] | |
restaurantl = [re.sub(r'\s+', '-', s) for s in restaurantl] | |
#Open each restaurant's page to add information | |
for l in restaurantl: | |
try: #First restaurant link possibility | |
driver.get("http://www.opentable.com/r/"+l+"-new-york") | |
hide = driver.find_element(By.XPATH, '//*[@id="info"]/div[6]/a') | |
hide.click() | |
xcuisine = driver.find_element(By.XPATH, '//*[@id="profile-details"]/div/div/div[1]/p[2]/span[2]').text | |
xcuisine = str(unicodedata.normalize("NFKD", xcuisine).encode('ascii','ignore')) | |
cuisine = xcuisine.split(',')[0] | |
cuisinel.append(cuisine) | |
dining = str(driver.find_element(By.XPATH, '//*[@id="profile-details"]/div/div/div[1]/p[1]/span[2]').text) | |
diningl.append(dining) | |
baddress = driver.find_element(By.XPATH, '//*[@id="info"]/div[2]/div/div[2]/div/div').text | |
baddress = str(unicodedata.normalize("NFKD", baddress).encode('ascii','ignore')) #normalize string just in case it has an accent | |
baddress = re.sub('\\n', ' ', baddress) | |
addressl.append(baddress) | |
nhtml = BeautifulSoup(driver.page_source) | |
title = nhtml.find_all('h1', {'itemprop':'name'})[0].text | |
restaurant2.append(str(unicodedata.normalize("NFKD", title).encode('ascii','ignore'))) #normalize string just in case it has an accent | |
cuisine2 = nhtml.find_all('li', {'class': 'profile-header-meta-item'})[0].text | |
cuisine2l.append(str(unicodedata.normalize("NFKD", cuisine2).encode('ascii','ignore'))) #normalize string just in case it has an accent | |
website = "http://www.opentable.com/r/"+l+"-new-york"+"?covers="+loopparty[0]+"&dateTime="+loopdate[0]+"%20"+looptime[0]+"%3A00" | |
websitel.append(website) | |
except NoSuchElementException as e: | |
try: #second restaurant link possibility repeated | |
driver.get("http://www.opentable.com/"+l) | |
hide = driver.find_element(By.XPATH, '//*[@id="info"]/div[6]/a') | |
hide.click() | |
xcuisine = driver.find_element(By.XPATH, '//*[@id="profile-details"]/div/div/div[1]/p[2]/span[2]').text | |
xcuisine = str(unicodedata.normalize("NFKD", xcuisine).encode('ascii','ignore')) | |
cuisine = xcuisine.split(',')[0] | |
cuisinel.append(cuisine) | |
dining = str(driver.find_element(By.XPATH, '//*[@id="profile-details"]/div/div/div[1]/p[1]/span[2]').text) | |
diningl.append(dining) | |
baddress = driver.find_element(By.XPATH, '//*[@id="info"]/div[2]/div/div[2]/div/div').text | |
baddress = str(unicodedata.normalize("NFKD", baddress).encode('ascii','ignore')) | |
baddress = re.sub('\\n', ' ', baddress) | |
addressl.append(baddress) | |
nhtml = BeautifulSoup(driver.page_source) | |
title = nhtml.find_all('h1', {'itemprop':'name'})[0].text | |
restaurant2.append(str(unicodedata.normalize("NFKD", title).encode('ascii','ignore'))) | |
cuisine2 = nhtml.find_all('li', {'class': 'profile-header-meta-item'})[0].text | |
cuisine2l.append(str(unicodedata.normalize("NFKD", cuisine2).encode('ascii','ignore'))) | |
website = "http://www.opentable.com/"+l+"?covers="+loopparty[0]+"&dateTime="+loopdate[0]+"%20"+looptime[0]+"%3A00" | |
websitel.append(website) | |
except NoSuchElementException as e: | |
try: #Third Restaurant Link Possibility repeated | |
driver.get("http://www.opentable.com/r/"+l) | |
hide = driver.find_element(By.XPATH, '//*[@id="info"]/div[6]/a') | |
hide.click() | |
xcuisine = driver.find_element(By.XPATH, '//*[@id="profile-details"]/div/div/div[1]/p[2]/span[2]').text | |
xcuisine = str(unicodedata.normalize("NFKD", xcuisine).encode('ascii','ignore')) | |
cuisine = xcuisine.split(',')[0] | |
cuisinel.append(cuisine) | |
dining = str(driver.find_element(By.XPATH, '//*[@id="profile-details"]/div/div/div[1]/p[1]/span[2]').text) | |
diningl.append(dining) | |
baddress = driver.find_element(By.XPATH, '//*[@id="info"]/div[2]/div/div[2]/div/div').text | |
baddress = str(unicodedata.normalize("NFKD", baddress).encode('ascii','ignore')) | |
baddress = re.sub('\\n', ' ', baddress) | |
addressl.append(baddress) | |
nhtml = BeautifulSoup(driver.page_source) | |
title = nhtml.find_all('h1', {'itemprop':'name'})[0].text | |
restaurant2.append(str(unicodedata.normalize("NFKD", title).encode('ascii','ignore'))) | |
cuisine2 = nhtml.find_all('li', {'class': 'profile-header-meta-item'})[0].text | |
cuisine2l.append(str(unicodedata.normalize("NFKD", cuisine2).encode('ascii','ignore'))) | |
website = "http://www.opentable.com/r/"+l+"?covers="+loopparty[0]+"&dateTime="+loopdate[0]+"%20"+looptime[0]+"%3A00" | |
websitel.append(website) | |
except NoSuchElementException as e: #If restaurant does not fit the above three, it is an exception. | |
print(l) | |
restaurantl.remove(l) | |
tabledata = pd.DataFrame({'Restaurant': restaurant2, 'Address': addressl, 'Cuisine': cuisine2l, 'DiningStyle': diningl}) | |
tabledata.to_csv('OpenTableData3.csv') | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment