Skip to content

Instantly share code, notes, and snippets.

@satishkt
Last active February 20, 2016 08:47
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save satishkt/73db32845b81882e2a50 to your computer and use it in GitHub Desktop.
Save satishkt/73db32845b81882e2a50 to your computer and use it in GitHub Desktop.
"""
Reads Google search history JSON files from the given directory.
Expected JSON format :
{"event":[
{"query":{"id":[{"timestamp_usec":"1135905619017279"}],"query_text":"XYZ"}},
{"query":{"id":[{"timestamp_usec":"1135903586447380"}],"query_text":"ABC"}},
]}
The folder containing the JSON files is stored in a config.ini file with the section
[google.search.history]
searchesFolder = /path/to/folder
"""
__author__ = 'satish'
__version__ = 0.1
import os
import glob
import json
from datetime import datetime
import ConfigParser
import csv
path='/Users/<Location>/*.json'
print glob.glob(path)
try:
os.remove('searchlog.csv')
except OSError:
pass
outfile = open('searchlog.csv','wt')
try:
fieldnames=('date','weekday','month','hour','min','search_text')
writer = csv.DictWriter(outfile,fieldnames=fieldnames)
headers =dict((n,n) for n in fieldnames)
writer.writerow(headers)
print
for infile in glob.glob(path):
print(infile)
json_data = open(infile)
data = json.load(json_data)
for i in range(0,len(data['event'])):
search = data['event'][i]['query']['query_text']
#print search
when = datetime.fromtimestamp(int(data['event'][i]['query']['id'][0]['timestamp_usec'])/1000000)
#print when
day = when.strftime(("%Y-%m-%d"))
#print day
weekday = when.strftime('%A')
#print weekday
month = when.strftime('%B')
#print month
hour = when.strftime('%H')
min = when.strftime('%M')
writer.writerow({
'date':when,
'weekday':weekday,
'month':month,
'hour':hour,
'min':min,
'search_text':search.encode('utf-8')
})
finally:
outfile.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment