Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
from apiclient.discovery import build
from oauth2client.service_account import ServiceAccountCredentials
import pandas as pd
import datetime
#SCOPES = ['https://www.googleapis.com/auth/analytics.readonly']
SCOPES = []
KEY_FILE_LOCATION = './path-to-your-private-key-file.json'
VIEW_ID = '123456789'
from_date = datetime.datetime(2017,12,1,0,0,0).strftime("%Y-%m-%d")
def initialize_analyticsreporting():
"""Initializes an Analytics Reporting API V4 service object.
Returns:
An authorized Analytics Reporting API V4 service object.
"""
credentials = ServiceAccountCredentials.from_json_keyfile_name(
KEY_FILE_LOCATION, SCOPES)
# Build the service object.
analytics = build('analyticsreporting', 'v4', credentials=credentials)
return analytics
def get_report(analytics, query):
"""Queries the Analytics Reporting API V4, combines multiple pageSize
together and returns a pandas dataframe.
Args:
analytics: An authorized Analytics Reporting API V4 service object.
query: the json that describes the query
Returns: a pandas dataframe.
"""
nextPageToken = '0'
responses = []
while (nextPageToken != None):
query['reportRequests'][0]['pageToken'] = nextPageToken
result = analytics.reports().batchGet(
body=query
).execute()
responses.append(result)
if 'nextPageToken' in result['reports'][0]:
nextPageToken = result['reports'][0]['nextPageToken']
else:
return responses
return responses
# The columns from GA come with a ga: -prefix. This simply removes that where it exists.
def strip_ga_prefix(string):
if string[:3] == 'ga:':
return string[3:]
else:
return string
def get_dataframe(responses):
"""Parses the Analytics Reporting API V4 response to a pandas dataframe
Args:
response: An Analytics Reporting API V4 response.
"""
rowlist = []
for response in responses:
for report in response.get('reports', []):
columnHeader = report.get('columnHeader', {})
dimensionHeaders = columnHeader.get('dimensions', [])
metricHeaders = columnHeader.get('metricHeader', {}).get('metricHeaderEntries', [])
for row in report.get('data', {}).get('rows', []):
new_row = {}
dimensions = row.get('dimensions', [])
dateRangeValues = row.get('metrics', [])
for header, dimension in zip(dimensionHeaders, dimensions):
new_row[strip_ga_prefix(header)] = dimension
for i, values in enumerate(dateRangeValues):
for metricHeader, value in zip(metricHeaders, values.get('values')):
new_row[strip_ga_prefix(metricHeader.get('name'))] = value
rowlist.append(new_row)
df = pd.DataFrame(rowlist)
return df
query = {
'reportRequests': [{
'viewId' : VIEW_ID,
'pageSize' : 10000,
'dateRanges': [{
'startDate' : from_date,
'endDate' : 'today'}],
'dimensions' : [
{'name' : 'ga:date'},
{'name' : 'ga:source'},
{'name' : 'ga:medium'},
{'name' : 'ga:campaign'},
{'name' : 'ga:keyword'},
{'name' : 'ga:adGroup'}
],
"orderBys": [
{'fieldName' : 'ga:adCost', 'sortOrder': 'DESCENDING'},
],
'metrics':[
{'expression' : 'ga:adCost'},],
}]
}
analytics = initialize_analyticsreporting()
response = get_report(analytics, query)
df = get_dataframe(response)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment