Skip to content

Instantly share code, notes, and snippets.

@davidlenz
Created April 26, 2018 21:17
Show Gist options
  • Save davidlenz/858b91d9a744f8aa36b412ed328f1326 to your computer and use it in GitHub Desktop.
Save davidlenz/858b91d9a744f8aa36b412ed328f1326 to your computer and use it in GitHub Desktop.
Search on Google and return list of results with urls. Tweaked from https://gist.github.com/azam-a/32b89944b98a3fd79d44ebfdac16b63d
# https://gist.github.com/azam-a/32b89944b98a3fd79d44ebfdac16b63d
import pandas as pd
import selenium
print('selenium.__version__: ', selenium.__version__)
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
def get_top_google_results(QUERY):
driver = webdriver.Firefox()
driver.get("http://www.google.com")
input_element = driver.find_element_by_name("q")
input_element.send_keys(QUERY)
input_element.submit()
RESULTS_LOCATOR = "//div/h3/a"
WebDriverWait(driver, 10).until( EC.visibility_of_element_located((By.XPATH, RESULTS_LOCATOR)))
# get text & url from current google search
page1_results = driver.find_elements(By.XPATH, RESULTS_LOCATOR)
results = []
for item in page1_results:
text = item.text
link = item.get_attribute("href")
results.append([text,link])
driver.quit()
result_df = pd.DataFrame(results, columns=['text', 'url'])
return result_df
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment