Skip to content

Instantly share code, notes, and snippets.

@kimiyuki
Created June 20, 2017 07:50
Show Gist options
  • Save kimiyuki/f89b6002e8d7a020a7cfeb69eaf9caad to your computer and use it in GitHub Desktop.
Save kimiyuki/f89b6002e8d7a020a7cfeb69eaf9caad to your computer and use it in GitHub Desktop.
hello-ga.ipynb
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"metadata": {
"collapsed": true,
"trusted": true
},
"cell_type": "code",
"source": "\"\"\"A simple example of how to access the Google Analytics API.\"\"\"\n\nimport argparse\nimport re, datetime\nfrom apiclient.discovery import build\nimport httplib2\nfrom oauth2client import client\nfrom oauth2client import file\nfrom oauth2client import tools\nimport numpy as np\nimport pandas as pd\n\ndef get_service(api_name, api_version, scope, client_secrets_path):\n \"\"\"Get a service that communicates to a Google API.\n\n Args:\n api_name: string The name of the api to connect to.\n api_version: string The api version to connect to.\n scope: A list of strings representing the auth scopes to authorize for the\n connection.\n client_secrets_path: string A path to a valid client secrets file.\n\n Returns:\n A service that is connected to the specified API.\n \"\"\"\n # Parse command-line arguments.\n parser = argparse.ArgumentParser(\n formatter_class=argparse.RawDescriptionHelpFormatter,\n parents=[tools.argparser])\n flags = parser.parse_args([])\n\n # Set up a Flow object to be used if we need to authenticate.\n flow = client.flow_from_clientsecrets(\n client_secrets_path,\n scope=scope,\n message=tools.message_if_missing(client_secrets_path))\n\n # Prepare credentials, and authorize HTTP object with them.\n # If the credentials don't exist or are invalid run through the native client\n # flow. The Storage object will ensure that if successful the good\n # credentials will get written back to a file.\n storage = file.Storage(api_name + '.dat')\n credentials = storage.get()\n if credentials is None or credentials.invalid:\n credentials = tools.run_flow(flow, storage, flags)\n http = credentials.authorize(http=httplib2.Http())\n\n # Build the service object.\n service = build(api_name, api_version, http=http)\n\n return service",
"execution_count": 7,
"outputs": []
},
{
"metadata": {
"collapsed": true,
"trusted": true
},
"cell_type": "code",
"source": "# Define the auth scopes to request.\nscope = ['https://www.googleapis.com/auth/analytics.readonly']\n# Authenticate and construct service.\nservice = get_service('analytics', 'v3', scope, 'client_secrets.json')",
"execution_count": 8,
"outputs": []
},
{
"metadata": {
"collapsed": true,
"trusted": true
},
"cell_type": "code",
"source": "def get_ga_data(service, prms):\n dt = service.data().ga().get(**prms).execute()\n mydic = {'STRING': str, \"INTEGER\": np.int, \"FLOAT\": np.float}\n colnames = [x['name'].replace('ga:','') for x in dt.get('columnHeaders')]\n cols = {x['name'].replace('ga:','') : mydic[x['dataType']] \n for x in dt.get(\"columnHeaders\")}\n ret = pd.DataFrame(data= dt['rows'], columns= colnames) \n yield ret.astype(dtype = cols) \n if 'date' in ret.columns:\n ret['date'] = pd.to_datetime(ret['date'], infer_datetime_format=True) \n if 'nextLink' in dt.keys():\n prms['start_index'] = re.search(r\"(?<=start-index)\\=(\\d+)\", dt['nextLink']).group(1)\n print(prms['start_index'])\n yield from get_ga_data(service, prms)\n return ",
"execution_count": 13,
"outputs": []
},
{
"metadata": {
"collapsed": true,
"trusted": true
},
"cell_type": "code",
"source": "params = {\n 'ids': \"ga:xxxx\",\n 'start_date': \"2017-06-18\",\n 'end_date': \"yesterday\",\n 'dimensions': \"ga:deviceCategory,ga:landingPagePath\",\n 'metrics': \"ga:sessions,ga:users,ga:totalEvents\",\n \"start_index\": 1,\n 'max_results': 10000\n}",
"execution_count": 14,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "dt = [rows for rows in get_ga_data(service, params)]",
"execution_count": 15,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": "10001\n20001\n30001\n"
}
]
},
{
"metadata": {
"collapsed": true,
"trusted": true
},
"cell_type": "code",
"source": "df = pd.concat(dt)",
"execution_count": 16,
"outputs": []
}
],
"metadata": {
"_draft": {
"nbviewer_url": "https://gist.github.com/7adad3b42050506208f00b8d70e3812c"
},
"anaconda-cloud": {},
"gist": {
"id": "7adad3b42050506208f00b8d70e3812c",
"data": {
"description": "hello-ga.ipynb",
"public": true
}
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3",
"language": "python"
},
"language_info": {
"name": "python",
"version": "3.6.0",
"mimetype": "text/x-python",
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"pygments_lexer": "ipython3",
"nbconvert_exporter": "python",
"file_extension": ".py"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment