Skip to content

Instantly share code, notes, and snippets.

@ahmedbesbes
Created January 20, 2020 13:58
Show Gist options
  • Save ahmedbesbes/34d0c62a515da72cd80dcd56f2b9c653 to your computer and use it in GitHub Desktop.
Save ahmedbesbes/34d0c62a515da72cd80dcd56f2b9c653 to your computer and use it in GitHub Desktop.
company_urls = {}
for category in tqdm_notebook(data):
for sub_category in tqdm_notebook(data[category], leave=False):
company_urls[sub_category] = []
url = base_url + data[category][sub_category] + "?numberofreviews=0&timeperiod=0&status=all"
driver.get(url)
try:
element_present = EC.presence_of_element_located(
(By.CLASS_NAME, 'category-business-card card'))
WebDriverWait(driver, timeout).until(element_present)
except:
pass
next_page = True
c = 1
while next_page:
extracted_company_urls = extract_company_urls_form_page()
company_urls[sub_category] += extracted_company_urls
next_page, button = go_next_page()
if next_page:
c += 1
next_url = base_url + data[category][sub_category] + "?numberofreviews=0&timeperiod=0&status=all" + f'&page={c}'
driver.get(next_url)
try:
element_present = EC.presence_of_element_located(
(By.CLASS_NAME, 'category-business-card card'))
WebDriverWait(driver, timeout).until(element_present)
except:
pass
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment