View SeleniumPreparation.py
from selenium import webdriver | |
# Set Windows path where WebDriver is located -> to be used for Selenium | |
chromepath = r'C:\Users\Downloads\chromedriver_win32\chromedriver.exe' |
View LoopThroughWebElements.py
out_lst = [] | |
for j in url_elt: | |
url = j.get_attribute("href") | |
out_lst.append(url) |
View BasicZomatoSearchPage.py
# Set Webdriver | |
driver = webdriver.Chrome(chromepath) | |
out_lst = [] | |
# Loop Through Search Pages that we wanted | |
for i in range(1, 1003): | |
driver.get('https://www.zomato.com/jakarta/restoran?page={}'.format(i)) | |
url_elt = driver.find_elements_by_class_name("result-title") | |
View ZomatoSearchPage.py
# Set Webdriver | |
driver = webdriver.Chrome(chromepath) | |
out_lst = [] | |
# Loop Through Search Pages that we wanted | |
for i in range(1, 1003): | |
print('Opening Search Pages ' + str(i)) | |
driver.get('https://www.zomato.com/jakarta/restoran?page={}'.format(i)) | |
print('Accessing Webpage OK \n') | |
url_elt = driver.find_elements_by_class_name("result-title") |
View ZomatoSearchPageDLV.py
# Set Webdriver | |
driver = webdriver.Chrome(chromepath) | |
out_lst_dlv = [] | |
# Loop Through - Search Pages that we wanted | |
for i in range(1, 224): | |
print('Opening Search Pages ' + str(i)) | |
driver.get('https://www.zomato.com/jakarta/delivery?page={}'.format(i)) | |
print('Accessing Webpage OK \n') | |
url_elt_dlv = driver.find_elements_by_class_name("result-title") |
View ZoRestName.py
# Initialize Empty Lists that we will use to store the scraping data results | |
rest_name = [] | |
driver = webdriver.Chrome(chromepath) | |
# Scrape the data by looping through entries in DataFrame | |
for url in out_df_nd['Website']: | |
driver.get(url) | |
name_anchor = driver.find_element_by_tag_name('h1') | |
name = name_anchor.text |
View ZoRestNameScrape2
# Initialize Empty List that we will use to store the scraping data results | |
rest_name = [] | |
driver = webdriver.Chrome(chromepath) | |
# Scrape the data by looping through entries in DataFrame | |
for url in out_df_nd['Website']: | |
driver.get(url) | |
print('Accessing Webpage OK') |
View ZoRestType.py
#Restaurant Type | |
rest_type_list = [] | |
rest_type_eltlist = driver.find_elements_by_xpath("""/html/body/div[1]/div[2]/main/div/section[3]/section/section[1]/section[1]/div/a""") | |
for rest_type_anchor in rest_type_eltlist: | |
rest_type_text = rest_type_anchor.text | |
rest_type_list.append(rest_type_text) | |
rest_type.append(rest_type_list) | |
print(f'Scraping Restaurant Type - {name} - {rest_type_text} - OK') |
View ZoRestAreaAddress.py
#Restaurant Area | |
rest_area_anchor = driver.find_element_by_xpath("""/html/body/div[1]/div[2]/main/div/section[3]/section/section[1]/section[1]/a""") | |
rest_area_text = rest_area_anchor.text | |
rest_area.append(rest_area_text) | |
print(f'Scraping Restaurant Area - {name} - {rest_area_text} - OK') | |
#Restaurant Address | |
rest_address_anchor = driver.find_element_by_xpath("""/html/body/div[1]/div[2]/main/div/section[4]/section/article/section/p""") | |
rest_address_text = rest_address_anchor.text | |
rest_address.append(rest_address_text) |
View ZoRestAreaAddress.py
#Restaurant Rating | |
try: | |
rest_rating_anchor = driver.find_element_by_xpath("""/html/body/div[1]/div[2]/main/div/section[3]/section/section[2]/section/div[1]/p""") | |
rest_rating_text = rest_rating_anchor.text | |
except NoSuchElementException: | |
rest_rating_text = "Not Rated Yet" | |
pass | |
rest_rating.append(rest_rating_text) | |
print(f'Scraping Restaurant Area - {name} - {rest_rating_text} - OK') |
OlderNewer