Skip to content

Instantly share code, notes, and snippets.

@amanahuja
Created May 27, 2012 02:39
Show Gist options
  • Save amanahuja/2796956 to your computer and use it in GitHub Desktop.
Save amanahuja/2796956 to your computer and use it in GitHub Desktop.
Load and prepare data on Consumer Electronics sales and corresponding Google Search queries
# -*- coding: utf-8 -*-
"""
Created on Thu May 22 20:30:36 2012
http://www.meetup.com/r-enthusiasts/events/65306492/
Mirroring the work that we do in Python.
This is the code to import the sales and query data into a Py-Pandas
dataframe (with conversion to time series).
Author (twitter): @amanqa
"""
import os
os.chdir("c:\TEMP\REnthusiasts\GoogleTrends-Code")
import pandas
from datetime import datetime
'''
Load first dataframe with Google Trends data
'''
df1 = pandas.read_csv("googletrends.csv")
df1 = df1.rename(columns = {
'Week': 'date',
'Consumer Electronics': 'query_index'
}, copy= False)
#Convert type of query volume index to float
df1['query_index'] = df1['query_index'].apply(
lambda ss: float(ss.replace('%', '')))
#Convert to datetime
df1['date'] = df1['date'].apply(
lambda ss: ss.split(' - ')[0].split('-'))
df1['date'] = df1['date'].apply(
lambda ss: datetime(int(ss[0]),int(ss[1]),int(ss[2])))
'''
Load second dataframe with sales data
'''
df2 = pandas.read_csv("sales.csv")
theyear = 2012
for idx, mm in enumerate(df2['month']):
if mm.startswith('January'): theyear -= 1
df2['month'][idx] += ' ' + str(theyear)
df2['month'] = df2['month'].apply(
lambda ss: datetime.strptime(ss, '%B %d %Y'))
df2 = df2.rename(columns = {
'month': 'date',
'Sales': 'sales',
}, copy = False)
'''
Merge the dataframes.
'''
df = pandas.merge(df1, df2, on='date', how='outer')
'''
Set df index to the 'date' column, and now we have a
time series dataframe ready for analysis.
'''
ts = df.set_index('date')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment