ilkkapeltola/ga_campaing_costs.py

## ga_campaing_costs.py
from apiclient.discovery import build
from oauth2client.service_account import ServiceAccountCredentials

import pandas as pd
import datetime

#SCOPES = ['https://www.googleapis.com/auth/analytics.readonly']
SCOPES = []
KEY_FILE_LOCATION = './path-to-your-private-key-file.json'
VIEW_ID = '123456789'
from_date = datetime.datetime(2017,12,1,0,0,0).strftime("%Y-%m-%d")


def initialize_analyticsreporting():
  """Initializes an Analytics Reporting API V4 service object.

  Returns:
    An authorized Analytics Reporting API V4 service object.
  """
  credentials = ServiceAccountCredentials.from_json_keyfile_name(
      KEY_FILE_LOCATION, SCOPES)

  # Build the service object.
  analytics = build('analyticsreporting', 'v4', credentials=credentials)

  return analytics


def get_report(analytics, query):
    """Queries the Analytics Reporting API V4, combines multiple pageSize
       together and returns a pandas dataframe.

    Args:
    analytics: An authorized Analytics Reporting API V4 service object.
    query: the json that describes the query
    Returns: a pandas dataframe.
    """
    nextPageToken = '0'
    responses = []
    while (nextPageToken != None):
        query['reportRequests'][0]['pageToken'] = nextPageToken
        result = analytics.reports().batchGet(
          body=query
        ).execute()
        responses.append(result)
        if 'nextPageToken' in result['reports'][0]:
            nextPageToken = result['reports'][0]['nextPageToken']
        else:
            return responses

    return responses

# The columns from GA come with a ga: -prefix. This simply removes that where it exists.
def strip_ga_prefix(string):
    if string[:3] == 'ga:':
        return string[3:]
    else:
        return string


def get_dataframe(responses):
  """Parses the Analytics Reporting API V4 response to a pandas dataframe

  Args:
    response: An Analytics Reporting API V4 response.
  """
  rowlist = []
  for response in responses:
      for report in response.get('reports', []):
        columnHeader = report.get('columnHeader', {})
        dimensionHeaders = columnHeader.get('dimensions', [])
        metricHeaders = columnHeader.get('metricHeader', {}).get('metricHeaderEntries', [])

        for row in report.get('data', {}).get('rows', []):
          new_row = {}
          dimensions = row.get('dimensions', [])
          dateRangeValues = row.get('metrics', [])

          for header, dimension in zip(dimensionHeaders, dimensions):
            new_row[strip_ga_prefix(header)] = dimension

          for i, values in enumerate(dateRangeValues):
            for metricHeader, value in zip(metricHeaders, values.get('values')):
              new_row[strip_ga_prefix(metricHeader.get('name'))] = value

          rowlist.append(new_row)

  df = pd.DataFrame(rowlist)
  return df


query = {
    'reportRequests': [{
        'viewId' : VIEW_ID,
        'pageSize' : 10000,

        'dateRanges': [{
            'startDate' : from_date,
            'endDate'   : 'today'}],

        'dimensions' : [
            {'name' : 'ga:date'},
            {'name' : 'ga:source'},
            {'name' : 'ga:medium'},
            {'name' : 'ga:campaign'},
            {'name' : 'ga:keyword'},
            {'name' : 'ga:adGroup'}
            ],
          "orderBys": [
       {'fieldName' : 'ga:adCost', 'sortOrder': 'DESCENDING'},
  ],

        'metrics':[
            {'expression' : 'ga:adCost'},],
    }]
}


analytics = initialize_analyticsreporting()
response = get_report(analytics, query)
df = get_dataframe(response)
	from apiclient.discovery import build
	from oauth2client.service_account import ServiceAccountCredentials

	import pandas as pd
	import datetime

	#SCOPES = ['https://www.googleapis.com/auth/analytics.readonly']
	SCOPES = []
	KEY_FILE_LOCATION = './path-to-your-private-key-file.json'
	VIEW_ID = '123456789'
	from_date = datetime.datetime(2017,12,1,0,0,0).strftime("%Y-%m-%d")


	def initialize_analyticsreporting():
	"""Initializes an Analytics Reporting API V4 service object.

	Returns:
	An authorized Analytics Reporting API V4 service object.
	"""
	credentials = ServiceAccountCredentials.from_json_keyfile_name(
	KEY_FILE_LOCATION, SCOPES)

	# Build the service object.
	analytics = build('analyticsreporting', 'v4', credentials=credentials)

	return analytics



	def get_report(analytics, query):
	"""Queries the Analytics Reporting API V4, combines multiple pageSize
	together and returns a pandas dataframe.

	Args:
	analytics: An authorized Analytics Reporting API V4 service object.
	query: the json that describes the query
	Returns: a pandas dataframe.
	"""
	nextPageToken = '0'
	responses = []
	while (nextPageToken != None):
	query['reportRequests'][0]['pageToken'] = nextPageToken
	result = analytics.reports().batchGet(
	body=query
	).execute()
	responses.append(result)
	if 'nextPageToken' in result['reports'][0]:
	nextPageToken = result['reports'][0]['nextPageToken']
	else:
	return responses

	return responses

	# The columns from GA come with a ga: -prefix. This simply removes that where it exists.
	def strip_ga_prefix(string):
	if string[:3] == 'ga:':
	return string[3:]
	else:
	return string


	def get_dataframe(responses):
	"""Parses the Analytics Reporting API V4 response to a pandas dataframe

	Args:
	response: An Analytics Reporting API V4 response.
	"""
	rowlist = []
	for response in responses:
	for report in response.get('reports', []):
	columnHeader = report.get('columnHeader', {})
	dimensionHeaders = columnHeader.get('dimensions', [])
	metricHeaders = columnHeader.get('metricHeader', {}).get('metricHeaderEntries', [])

	for row in report.get('data', {}).get('rows', []):
	new_row = {}
	dimensions = row.get('dimensions', [])
	dateRangeValues = row.get('metrics', [])

	for header, dimension in zip(dimensionHeaders, dimensions):
	new_row[strip_ga_prefix(header)] = dimension

	for i, values in enumerate(dateRangeValues):
	for metricHeader, value in zip(metricHeaders, values.get('values')):
	new_row[strip_ga_prefix(metricHeader.get('name'))] = value

	rowlist.append(new_row)

	df = pd.DataFrame(rowlist)
	return df


	query = {
	'reportRequests': [{
	'viewId' : VIEW_ID,
	'pageSize' : 10000,

	'dateRanges': [{
	'startDate' : from_date,
	'endDate' : 'today'}],

	'dimensions' : [
	{'name' : 'ga:date'},
	{'name' : 'ga:source'},
	{'name' : 'ga:medium'},
	{'name' : 'ga:campaign'},
	{'name' : 'ga:keyword'},
	{'name' : 'ga:adGroup'}
	],
	"orderBys": [
	{'fieldName' : 'ga:adCost', 'sortOrder': 'DESCENDING'},
	],

	'metrics':[
	{'expression' : 'ga:adCost'},],
	}]
	}


	analytics = initialize_analyticsreporting()
	response = get_report(analytics, query)
	df = get_dataframe(response)