jameslee0920/Selenium opentable

## Selenium opentable
#import functions
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.common.exceptions import NoSuchElementException
from bs4 import BeautifulSoup
from selenium.webdriver.support.ui import Select
from geopy.geocoders import Nominatim
import unicodedata
import re
import pandas as pd


#Open Driver Path
driver = webdriver.Chrome('/Users/James/Desktop/chromedriver.exe')

#Add in time/date/#party values
looptime = ['18']
loopdate = ['2016-11-26']
loopparty = ['4']

#Create Empty Lists to append in loop
restaurant = []
cuisinel = []
diningl = []
addressl = []
pricel = []
restaurant2 = []
cuisine2l = []
websitel = []

#Begin Loop to find name of restaurants that fit the specified values above
for p in loopparty:
    for d in loopdate:
        for i in looptime:
            driver.get("http://www.opentable.com/s/?covers="+p+"&currentview=list&datetime="+d+"+"+i+"%3A00&metroid=8&regionids=16&size=100&sort=Popularity&from=0")
            html = BeautifulSoup(driver.page_source)
            pagen = int(html.find_all('span', {'class':'js-pagination-page pagination-link '})[-1].text)*100
            for j in range(0, pagen, 100):
                pagel = str(j)
                driver.get("http://www.opentable.com/s/?covers="+p+"&currentview=list&datetime="+d+"+"+i+"%3A00&metroid=8&regionids=16&size=100&sort=Popularity&from="+pagel)
                rhtml = BeautifulSoup(driver.page_source)
                restaurantlist = rhtml.find_all('span', {'class':"rest-row-name-text"})[3:]
                for r in range(0,len(restaurantlist)):
                    restaurant.append(unicodedata.normalize("NFKD", restaurantlist[r].text).encode('ascii','ignore'))

#Format string to fit restaurant link
restaurantl = [re.sub('&', 'and',s) for s in restaurant]
restaurantl = [re.sub(' - ', '-', s) for s in restaurantl]
restaurantl = [re.sub(r'[^a-zA-Z0-9\s-]+', '', s) for s in restaurantl]
restaurantl = [re.sub(r'\s+', '-', s) for s in restaurantl]


#Open each restaurant's page to add information
for l in restaurantl:
    try: #First restaurant link possibility
        driver.get("http://www.opentable.com/r/"+l+"-new-york")
        hide = driver.find_element(By.XPATH, '//*[@id="info"]/div[6]/a')
        hide.click()
        xcuisine = driver.find_element(By.XPATH, '//*[@id="profile-details"]/div/div/div[1]/p[2]/span[2]').text
        xcuisine = str(unicodedata.normalize("NFKD", xcuisine).encode('ascii','ignore'))
        cuisine = xcuisine.split(',')[0]
        cuisinel.append(cuisine)
        dining = str(driver.find_element(By.XPATH, '//*[@id="profile-details"]/div/div/div[1]/p[1]/span[2]').text)
        diningl.append(dining)
        baddress = driver.find_element(By.XPATH, '//*[@id="info"]/div[2]/div/div[2]/div/div').text
        baddress = str(unicodedata.normalize("NFKD", baddress).encode('ascii','ignore')) #normalize string just in case it has an accent
        baddress = re.sub('\\n', ' ', baddress)
        addressl.append(baddress)
        nhtml = BeautifulSoup(driver.page_source)
        title = nhtml.find_all('h1', {'itemprop':'name'})[0].text
        restaurant2.append(str(unicodedata.normalize("NFKD", title).encode('ascii','ignore'))) #normalize string just in case it has an accent
        cuisine2 = nhtml.find_all('li', {'class': 'profile-header-meta-item'})[0].text
        cuisine2l.append(str(unicodedata.normalize("NFKD", cuisine2).encode('ascii','ignore'))) #normalize string just in case it has an accent
        website = "http://www.opentable.com/r/"+l+"-new-york"+"?covers="+loopparty[0]+"&dateTime="+loopdate[0]+"%20"+looptime[0]+"%3A00"
        websitel.append(website)
    except NoSuchElementException as e:
        try: #second restaurant link possibility repeated
            driver.get("http://www.opentable.com/"+l)
            hide = driver.find_element(By.XPATH, '//*[@id="info"]/div[6]/a')
            hide.click()
            xcuisine = driver.find_element(By.XPATH, '//*[@id="profile-details"]/div/div/div[1]/p[2]/span[2]').text
            xcuisine = str(unicodedata.normalize("NFKD", xcuisine).encode('ascii','ignore'))
            cuisine = xcuisine.split(',')[0]
            cuisinel.append(cuisine)
            dining = str(driver.find_element(By.XPATH, '//*[@id="profile-details"]/div/div/div[1]/p[1]/span[2]').text)
            diningl.append(dining)
            baddress = driver.find_element(By.XPATH, '//*[@id="info"]/div[2]/div/div[2]/div/div').text
            baddress = str(unicodedata.normalize("NFKD", baddress).encode('ascii','ignore'))
            baddress = re.sub('\\n', ' ', baddress)
            addressl.append(baddress)
            nhtml = BeautifulSoup(driver.page_source)
            title = nhtml.find_all('h1', {'itemprop':'name'})[0].text
            restaurant2.append(str(unicodedata.normalize("NFKD", title).encode('ascii','ignore')))
            cuisine2 = nhtml.find_all('li', {'class': 'profile-header-meta-item'})[0].text
            cuisine2l.append(str(unicodedata.normalize("NFKD", cuisine2).encode('ascii','ignore')))
            website = "http://www.opentable.com/"+l+"?covers="+loopparty[0]+"&dateTime="+loopdate[0]+"%20"+looptime[0]+"%3A00"
            websitel.append(website)
        except NoSuchElementException as e:
            try: #Third Restaurant Link Possibility repeated
                driver.get("http://www.opentable.com/r/"+l)
                hide = driver.find_element(By.XPATH, '//*[@id="info"]/div[6]/a')
                hide.click()
                xcuisine = driver.find_element(By.XPATH, '//*[@id="profile-details"]/div/div/div[1]/p[2]/span[2]').text
                xcuisine = str(unicodedata.normalize("NFKD", xcuisine).encode('ascii','ignore'))
                cuisine = xcuisine.split(',')[0]
                cuisinel.append(cuisine)
                dining = str(driver.find_element(By.XPATH, '//*[@id="profile-details"]/div/div/div[1]/p[1]/span[2]').text)
                diningl.append(dining)
                baddress = driver.find_element(By.XPATH, '//*[@id="info"]/div[2]/div/div[2]/div/div').text
                baddress = str(unicodedata.normalize("NFKD", baddress).encode('ascii','ignore'))
                baddress = re.sub('\\n', ' ', baddress)
                addressl.append(baddress)
                nhtml = BeautifulSoup(driver.page_source)
                title = nhtml.find_all('h1', {'itemprop':'name'})[0].text
                restaurant2.append(str(unicodedata.normalize("NFKD", title).encode('ascii','ignore')))
                cuisine2 = nhtml.find_all('li', {'class': 'profile-header-meta-item'})[0].text
                cuisine2l.append(str(unicodedata.normalize("NFKD", cuisine2).encode('ascii','ignore')))
                website = "http://www.opentable.com/r/"+l+"?covers="+loopparty[0]+"&dateTime="+loopdate[0]+"%20"+looptime[0]+"%3A00"
                websitel.append(website)
            except NoSuchElementException as e: #If restaurant does not fit the above three, it is an exception.
                print(l)
                restaurantl.remove(l)


tabledata = pd.DataFrame({'Restaurant': restaurant2, 'Address': addressl, 'Cuisine': cuisine2l, 'DiningStyle': diningl})
tabledata.to_csv('OpenTableData3.csv')
	#import functions
	from selenium import webdriver
	from selenium.webdriver.common.keys import Keys
	from selenium.webdriver.common.by import By
	from selenium.webdriver.support.ui import WebDriverWait
	from selenium.common.exceptions import NoSuchElementException
	from bs4 import BeautifulSoup
	from selenium.webdriver.support.ui import Select
	from geopy.geocoders import Nominatim
	import unicodedata
	import re
	import pandas as pd


	#Open Driver Path
	driver = webdriver.Chrome('/Users/James/Desktop/chromedriver.exe')

	#Add in time/date/#party values
	looptime = ['18']
	loopdate = ['2016-11-26']
	loopparty = ['4']

	#Create Empty Lists to append in loop
	restaurant = []
	cuisinel = []
	diningl = []
	addressl = []
	pricel = []
	restaurant2 = []
	cuisine2l = []
	websitel = []

	#Begin Loop to find name of restaurants that fit the specified values above
	for p in loopparty:
	for d in loopdate:
	for i in looptime:
	driver.get("http://www.opentable.com/s/?covers="+p+"&currentview=list&datetime="+d+"+"+i+"%3A00&metroid=8&regionids=16&size=100&sort=Popularity&from=0")
	html = BeautifulSoup(driver.page_source)
	pagen = int(html.find_all('span', {'class':'js-pagination-page pagination-link '})[-1].text)*100
	for j in range(0, pagen, 100):
	pagel = str(j)
	driver.get("http://www.opentable.com/s/?covers="+p+"&currentview=list&datetime="+d+"+"+i+"%3A00&metroid=8&regionids=16&size=100&sort=Popularity&from="+pagel)
	rhtml = BeautifulSoup(driver.page_source)
	restaurantlist = rhtml.find_all('span', {'class':"rest-row-name-text"})[3:]
	for r in range(0,len(restaurantlist)):
	restaurant.append(unicodedata.normalize("NFKD", restaurantlist[r].text).encode('ascii','ignore'))

	#Format string to fit restaurant link
	restaurantl = [re.sub('&', 'and',s) for s in restaurant]
	restaurantl = [re.sub(' - ', '-', s) for s in restaurantl]
	restaurantl = [re.sub(r'[^a-zA-Z0-9\s-]+', '', s) for s in restaurantl]
	restaurantl = [re.sub(r'\s+', '-', s) for s in restaurantl]



	#Open each restaurant's page to add information
	for l in restaurantl:
	try: #First restaurant link possibility
	driver.get("http://www.opentable.com/r/"+l+"-new-york")
	hide = driver.find_element(By.XPATH, '//*[@id="info"]/div[6]/a')
	hide.click()
	xcuisine = driver.find_element(By.XPATH, '//*[@id="profile-details"]/div/div/div[1]/p[2]/span[2]').text
	xcuisine = str(unicodedata.normalize("NFKD", xcuisine).encode('ascii','ignore'))
	cuisine = xcuisine.split(',')[0]
	cuisinel.append(cuisine)
	dining = str(driver.find_element(By.XPATH, '//*[@id="profile-details"]/div/div/div[1]/p[1]/span[2]').text)
	diningl.append(dining)
	baddress = driver.find_element(By.XPATH, '//*[@id="info"]/div[2]/div/div[2]/div/div').text
	baddress = str(unicodedata.normalize("NFKD", baddress).encode('ascii','ignore')) #normalize string just in case it has an accent
	baddress = re.sub('\\n', ' ', baddress)
	addressl.append(baddress)
	nhtml = BeautifulSoup(driver.page_source)
	title = nhtml.find_all('h1', {'itemprop':'name'})[0].text
	restaurant2.append(str(unicodedata.normalize("NFKD", title).encode('ascii','ignore'))) #normalize string just in case it has an accent
	cuisine2 = nhtml.find_all('li', {'class': 'profile-header-meta-item'})[0].text
	cuisine2l.append(str(unicodedata.normalize("NFKD", cuisine2).encode('ascii','ignore'))) #normalize string just in case it has an accent
	website = "http://www.opentable.com/r/"+l+"-new-york"+"?covers="+loopparty[0]+"&dateTime="+loopdate[0]+"%20"+looptime[0]+"%3A00"
	websitel.append(website)
	except NoSuchElementException as e:
	try: #second restaurant link possibility repeated
	driver.get("http://www.opentable.com/"+l)
	hide = driver.find_element(By.XPATH, '//*[@id="info"]/div[6]/a')
	hide.click()
	xcuisine = driver.find_element(By.XPATH, '//*[@id="profile-details"]/div/div/div[1]/p[2]/span[2]').text
	xcuisine = str(unicodedata.normalize("NFKD", xcuisine).encode('ascii','ignore'))
	cuisine = xcuisine.split(',')[0]
	cuisinel.append(cuisine)
	dining = str(driver.find_element(By.XPATH, '//*[@id="profile-details"]/div/div/div[1]/p[1]/span[2]').text)
	diningl.append(dining)
	baddress = driver.find_element(By.XPATH, '//*[@id="info"]/div[2]/div/div[2]/div/div').text
	baddress = str(unicodedata.normalize("NFKD", baddress).encode('ascii','ignore'))
	baddress = re.sub('\\n', ' ', baddress)
	addressl.append(baddress)
	nhtml = BeautifulSoup(driver.page_source)
	title = nhtml.find_all('h1', {'itemprop':'name'})[0].text
	restaurant2.append(str(unicodedata.normalize("NFKD", title).encode('ascii','ignore')))
	cuisine2 = nhtml.find_all('li', {'class': 'profile-header-meta-item'})[0].text
	cuisine2l.append(str(unicodedata.normalize("NFKD", cuisine2).encode('ascii','ignore')))
	website = "http://www.opentable.com/"+l+"?covers="+loopparty[0]+"&dateTime="+loopdate[0]+"%20"+looptime[0]+"%3A00"
	websitel.append(website)
	except NoSuchElementException as e:
	try: #Third Restaurant Link Possibility repeated
	driver.get("http://www.opentable.com/r/"+l)
	hide = driver.find_element(By.XPATH, '//*[@id="info"]/div[6]/a')
	hide.click()
	xcuisine = driver.find_element(By.XPATH, '//*[@id="profile-details"]/div/div/div[1]/p[2]/span[2]').text
	xcuisine = str(unicodedata.normalize("NFKD", xcuisine).encode('ascii','ignore'))
	cuisine = xcuisine.split(',')[0]
	cuisinel.append(cuisine)
	dining = str(driver.find_element(By.XPATH, '//*[@id="profile-details"]/div/div/div[1]/p[1]/span[2]').text)
	diningl.append(dining)
	baddress = driver.find_element(By.XPATH, '//*[@id="info"]/div[2]/div/div[2]/div/div').text
	baddress = str(unicodedata.normalize("NFKD", baddress).encode('ascii','ignore'))
	baddress = re.sub('\\n', ' ', baddress)
	addressl.append(baddress)
	nhtml = BeautifulSoup(driver.page_source)
	title = nhtml.find_all('h1', {'itemprop':'name'})[0].text
	restaurant2.append(str(unicodedata.normalize("NFKD", title).encode('ascii','ignore')))
	cuisine2 = nhtml.find_all('li', {'class': 'profile-header-meta-item'})[0].text
	cuisine2l.append(str(unicodedata.normalize("NFKD", cuisine2).encode('ascii','ignore')))
	website = "http://www.opentable.com/r/"+l+"?covers="+loopparty[0]+"&dateTime="+loopdate[0]+"%20"+looptime[0]+"%3A00"
	websitel.append(website)
	except NoSuchElementException as e: #If restaurant does not fit the above three, it is an exception.
	print(l)
	restaurantl.remove(l)


	tabledata = pd.DataFrame({'Restaurant': restaurant2, 'Address': addressl, 'Cuisine': cuisine2l, 'DiningStyle': diningl})
	tabledata.to_csv('OpenTableData3.csv')