-
-
Save tlmaloney/5650699 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python | |
""" | |
Downloads and cleans up a CSV file from a Google Trends query. | |
Usage: | |
trends.py google.username@gmail.com google.password /path/to/filename query1 [query2 ...] | |
Requires mechanize: | |
pip install mechanize | |
""" | |
import cookielib | |
import csv | |
import mechanize | |
import re | |
from StringIO import StringIO | |
import sys | |
def main(argv): | |
# Google Login credentials | |
username = argv[1] | |
password = argv[2] | |
# Where to save the CSV file | |
pathname = argv[3] | |
queries = ('q=' + query for query in argv[4:]) | |
br = mechanize.Browser() | |
# Create cookie jar | |
cj = cookielib.LWPCookieJar() | |
br.set_cookiejar(cj) | |
# Act like we're a real browser | |
br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')] | |
# Login in to Google | |
response = br.open('https://accounts.google.com/ServiceLogin?hl=en&continue=https://www.google.com/') | |
forms = mechanize.ParseResponse(response) | |
form = forms[0] | |
form['Email'] = username | |
form['Passwd'] = password | |
response = br.open(form.click()) | |
# Get CSV from Google Trends | |
trends_url = 'http://www.google.com/trends/trendsReport?' | |
query_params = '&'.join(queries) | |
response = br.open(trends_url + query_params + '&export=1') | |
# Remove headers and footers from Google's CSV | |
# Use last date in date range | |
reader = csv.reader(StringIO(response.read())) | |
dates = [] | |
values = [] | |
for row in reader: | |
try: | |
date, value = row | |
except ValueError: | |
continue | |
if re.search('[0-9]{4}-[0-9]{2}-[0-9]{2}', date): | |
dates.append(date[-10:]) # Uses last date in time period | |
values.append(value) | |
with open(pathname, 'w') as f: | |
writer = csv.writer(f) | |
writer.writerow(['date', 'debt']) | |
for row in zip(dates, values): | |
writer.writerow(row) | |
if __name__ == '__main__': | |
sys.exit(main(sys.argv)) |
Finally I am able to run the file now but its not giving any data... I am getting CSV file with just two column "date" and "debt" nothing else
I am using Python 2.7.10, and mechanize-0.2.5, but got the error below:
Traceback (most recent call last):
File "./trends.py", line 71, in
sys.exit(main(sys.argv))
File "./trends.py", line 42, in main
form['Passwd'] = password
File "/Library/Python/2.7/site-packages/mechanize/_form.py", line 2780, in setitem
control = self.find_control(name)
File "/Library/Python/2.7/site-packages/mechanize/_form.py", line 3101, in find_control
return self._find_control(name, type, kind, id, label, predicate, nr)
File "/Library/Python/2.7/site-packages/mechanize/_form.py", line 3185, in _find_control
raise ControlNotFoundError("no control matching "+description)
mechanize._form.ControlNotFoundError: no control matching name 'Passed'
Changed to 2 steps of login and it's working now:
Login in to Google
response = br.open('https://accounts.google.com/ServiceLogin?hl=en&continue=https://www.google.com/')
forms = mechanize.ParseResponse(response)
form = forms[0]
form['Email'] = username
response = br.open(form.click())
forms = mechanize.ParseResponse(response)
form = forms[0]
form['Passwd'] = password
response = br.open(form.click())
@supermaxim I have installed Python 2.7.11 but after that getting the same "HTTP Error 403: request disallowed by robots.txt" error. I added "br.set_handle_robots(False)" but this is giving me another error of "unexpected indent" - where do I add this line?