makyol/google_activity_scraper.py

## google_activity_scraper.py
"""
- This script scrapes hourly activity graph presented on Google Search results for
specific places such as cafes, resturants, and shopping centers using Selenium Webdriver.
- To use it, you just need to provide the search term for the query. The results are saved into a .csv file.
"""

import time
import sys
import random
import pandas as pd
from selenium import webdriver

query = "SEARCH TERM"
query = query.replace(' ', '+')
url = "https://www.google.com/search?q="+query
ts_data = pd.DataFrame(columns=['hour', 'height'])


driver = webdriver.Chrome("./chromedriver")
driver.get(url)
time.sleep(random.randint(2000, 3000)/1000)

try:
    hours_div = driver.find_element_by_css_selector('div.yPHXsc') # driver.find_elements_by_xpath(hours_div)
    hours = hours_div.find_elements_by_tag_name('div')
except:
    driver.close()
    sys.exit("No hourly activity found, try with a different search term, \nClosing...")

print("Hours found: " + str(len(hours)))
for i in range(len(hours)):
    style_data = hours[i].get_attribute("style")
    ts_data.at[i+1, 'hour'] = i + 6
    if style_data != '':
        ts_data.at[i + 1, 'height'] = style_data[8:style_data.find('px')]
    else:
        ts_data.at[i + 1, 'height'] = 0


time.sleep(random.randint(500,1000)/1000)
ts_data.to_csv(query+'.csv', encoding='utf-8')
driver.close()
	"""
	- This script scrapes hourly activity graph presented on Google Search results for
	specific places such as cafes, resturants, and shopping centers using Selenium Webdriver.
	- To use it, you just need to provide the search term for the query. The results are saved into a .csv file.
	"""

	import time
	import sys
	import random
	import pandas as pd
	from selenium import webdriver

	query = "SEARCH TERM"
	query = query.replace(' ', '+')
	url = "https://www.google.com/search?q="+query
	ts_data = pd.DataFrame(columns=['hour', 'height'])


	driver = webdriver.Chrome("./chromedriver")
	driver.get(url)
	time.sleep(random.randint(2000, 3000)/1000)

	try:
	hours_div = driver.find_element_by_css_selector('div.yPHXsc') # driver.find_elements_by_xpath(hours_div)
	hours = hours_div.find_elements_by_tag_name('div')
	except:
	driver.close()
	sys.exit("No hourly activity found, try with a different search term, \nClosing...")

	print("Hours found: " + str(len(hours)))
	for i in range(len(hours)):
	style_data = hours[i].get_attribute("style")
	ts_data.at[i+1, 'hour'] = i + 6
	if style_data != '':
	ts_data.at[i + 1, 'height'] = style_data[8:style_data.find('px')]
	else:
	ts_data.at[i + 1, 'height'] = 0


	time.sleep(random.randint(500,1000)/1000)
	ts_data.to_csv(query+'.csv', encoding='utf-8')
	driver.close()