Last active
February 13, 2018 09:32
-
-
Save ilkkapeltola/0e3990265b92b54954fc618edfad5a7d to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from apiclient.discovery import build | |
from oauth2client.service_account import ServiceAccountCredentials | |
import pandas as pd | |
import datetime | |
#SCOPES = ['https://www.googleapis.com/auth/analytics.readonly'] | |
SCOPES = [] | |
KEY_FILE_LOCATION = './path-to-your-private-key-file.json' | |
VIEW_ID = '123456789' | |
from_date = datetime.datetime(2017,12,1,0,0,0).strftime("%Y-%m-%d") | |
def initialize_analyticsreporting(): | |
"""Initializes an Analytics Reporting API V4 service object. | |
Returns: | |
An authorized Analytics Reporting API V4 service object. | |
""" | |
credentials = ServiceAccountCredentials.from_json_keyfile_name( | |
KEY_FILE_LOCATION, SCOPES) | |
# Build the service object. | |
analytics = build('analyticsreporting', 'v4', credentials=credentials) | |
return analytics | |
def get_report(analytics, query): | |
"""Queries the Analytics Reporting API V4, combines multiple pageSize | |
together and returns a pandas dataframe. | |
Args: | |
analytics: An authorized Analytics Reporting API V4 service object. | |
query: the json that describes the query | |
Returns: a pandas dataframe. | |
""" | |
nextPageToken = '0' | |
responses = [] | |
while (nextPageToken != None): | |
query['reportRequests'][0]['pageToken'] = nextPageToken | |
result = analytics.reports().batchGet( | |
body=query | |
).execute() | |
responses.append(result) | |
if 'nextPageToken' in result['reports'][0]: | |
nextPageToken = result['reports'][0]['nextPageToken'] | |
else: | |
return responses | |
return responses | |
# The columns from GA come with a ga: -prefix. This simply removes that where it exists. | |
def strip_ga_prefix(string): | |
if string[:3] == 'ga:': | |
return string[3:] | |
else: | |
return string | |
def get_dataframe(responses): | |
"""Parses the Analytics Reporting API V4 response to a pandas dataframe | |
Args: | |
response: An Analytics Reporting API V4 response. | |
""" | |
rowlist = [] | |
for response in responses: | |
for report in response.get('reports', []): | |
columnHeader = report.get('columnHeader', {}) | |
dimensionHeaders = columnHeader.get('dimensions', []) | |
metricHeaders = columnHeader.get('metricHeader', {}).get('metricHeaderEntries', []) | |
for row in report.get('data', {}).get('rows', []): | |
new_row = {} | |
dimensions = row.get('dimensions', []) | |
dateRangeValues = row.get('metrics', []) | |
for header, dimension in zip(dimensionHeaders, dimensions): | |
new_row[strip_ga_prefix(header)] = dimension | |
for i, values in enumerate(dateRangeValues): | |
for metricHeader, value in zip(metricHeaders, values.get('values')): | |
new_row[strip_ga_prefix(metricHeader.get('name'))] = value | |
rowlist.append(new_row) | |
df = pd.DataFrame(rowlist) | |
return df | |
query = { | |
'reportRequests': [{ | |
'viewId' : VIEW_ID, | |
'pageSize' : 10000, | |
'dateRanges': [{ | |
'startDate' : from_date, | |
'endDate' : 'today'}], | |
'dimensions' : [ | |
{'name' : 'ga:date'}, | |
{'name' : 'ga:source'}, | |
{'name' : 'ga:medium'}, | |
{'name' : 'ga:campaign'}, | |
{'name' : 'ga:keyword'}, | |
{'name' : 'ga:adGroup'} | |
], | |
"orderBys": [ | |
{'fieldName' : 'ga:adCost', 'sortOrder': 'DESCENDING'}, | |
], | |
'metrics':[ | |
{'expression' : 'ga:adCost'},], | |
}] | |
} | |
analytics = initialize_analyticsreporting() | |
response = get_report(analytics, query) | |
df = get_dataframe(response) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment