Created
April 26, 2018 21:17
-
-
Save davidlenz/858b91d9a744f8aa36b412ed328f1326 to your computer and use it in GitHub Desktop.
Search on Google and return list of results with urls. Tweaked from https://gist.github.com/azam-a/32b89944b98a3fd79d44ebfdac16b63d
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# https://gist.github.com/azam-a/32b89944b98a3fd79d44ebfdac16b63d | |
import pandas as pd | |
import selenium | |
print('selenium.__version__: ', selenium.__version__) | |
from selenium import webdriver | |
from selenium.webdriver.common.by import By | |
from selenium.webdriver.support.ui import WebDriverWait | |
from selenium.webdriver.support import expected_conditions as EC | |
def get_top_google_results(QUERY): | |
driver = webdriver.Firefox() | |
driver.get("http://www.google.com") | |
input_element = driver.find_element_by_name("q") | |
input_element.send_keys(QUERY) | |
input_element.submit() | |
RESULTS_LOCATOR = "//div/h3/a" | |
WebDriverWait(driver, 10).until( EC.visibility_of_element_located((By.XPATH, RESULTS_LOCATOR))) | |
# get text & url from current google search | |
page1_results = driver.find_elements(By.XPATH, RESULTS_LOCATOR) | |
results = [] | |
for item in page1_results: | |
text = item.text | |
link = item.get_attribute("href") | |
results.append([text,link]) | |
driver.quit() | |
result_df = pd.DataFrame(results, columns=['text', 'url']) | |
return result_df |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment