Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Get Data from Analytics Reporting API, Connect Jupyter Notebook with Spread Sheet
{
"cells": [
{
"metadata": {
"code_folding": [],
"trusted": true
},
"cell_type": "code",
"source": "# Import libraries \nimport pandas as pd\nimport warnings\nwarnings.filterwarnings(\"ignore\")",
"execution_count": 1,
"outputs": []
},
{
"metadata": {
"code_folding": [],
"trusted": true
},
"cell_type": "code",
"source": "# Import the code for fetching GA Data\nfrom gaData import *",
"execution_count": 3,
"outputs": []
},
{
"metadata": {
"code_folding": [],
"trusted": true
},
"cell_type": "code",
"source": "######### Configure the inputs ##################\n### Replace with your own view_id\ngaView = \"xxxxxxxx\"\n### Set the date range for your data\nstart_date='2018-01-01'\nend_date='2018-11-30'\n### Replace with your own segmentId you found in Query Explorer\nsegments = [{\"segmentId\": \"gaid::xxxxxxxxxxxxxx\"}]\n### To get data, we must have one metrics, here we use sessions\nmetrics=[{\"expression\": \"ga:sessions\"}]\n### Replace with your own custom dimention ID and include segment also\ndimensions=[{\"name\": \"ga:dimensionXX\"},{\"name\": \"ga:segment\"}]\n### Your data will be grouped by the dimensions you use \ngroup_by = [\"ga:dimensionXX\",\"ga:segment\"]\n### Set your client secret path where you put your client credentials \nCLIENT_SECRETS_PATH = 'key/client_secret.json'",
"execution_count": 5,
"outputs": []
},
{
"metadata": {
"code_folding": [],
"trusted": true
},
"cell_type": "code",
"source": "# Initiate the analytics client\nanalytics = initialize_analyticsreporting(CLIENT_SECRETS_PATH)",
"execution_count": 6,
"outputs": []
},
{
"metadata": {
"code_folding": [],
"trusted": true
},
"cell_type": "code",
"source": "######### Run the program to get data ##################\nstart_time = time.time()\ndata = return_ga_data(analytics,\n start_date= start_date,\n end_date=end_date,\n view_id=gaView,\n metrics=metrics,\n dimensions=dimensions,\n segments = segments,\n group_by = group_by)\nprint(\"Running Time --- %s seconds ---\" % (time.time() - start_time))\ndata.head()",
"execution_count": 7,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": "Running Time --- 1559.456502199173 seconds ---\n"
},
{
"data": {
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>ga:dimension42</th>\n <th>ga:segment</th>\n <th>ga:sessions</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>1001033209.1481225329</td>\n <td>phoneNumber</td>\n <td>2.0</td>\n </tr>\n <tr>\n <th>1</th>\n <td>1001090294.1528794762</td>\n <td>phoneNumber</td>\n <td>1.0</td>\n </tr>\n <tr>\n <th>2</th>\n <td>1007582195.1522236043</td>\n <td>phoneNumber</td>\n <td>3.0</td>\n </tr>\n <tr>\n <th>3</th>\n <td>1010110771.1512153214</td>\n <td>phoneNumber</td>\n <td>1.0</td>\n </tr>\n <tr>\n <th>4</th>\n <td>101499865.1532375076</td>\n <td>phoneNumber</td>\n <td>1.0</td>\n </tr>\n </tbody>\n</table>\n</div>",
"text/plain": " ga:dimension42 ga:segment ga:sessions\n0 1001033209.1481225329 phoneNumber 2.0\n1 1001090294.1528794762 phoneNumber 1.0\n2 1007582195.1522236043 phoneNumber 3.0\n3 1010110771.1512153214 phoneNumber 1.0\n4 101499865.1532375076 phoneNumber 1.0"
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "# Import libraries for connect to google spread sheet\nimport gspread\nfrom oauth2client.service_account import ServiceAccountCredentials\nfrom df2gspread import df2gspread as d2g",
"execution_count": 2,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "# Specify we are using client IDs to delete users\ndata['typeId'] = 'CLIENT_ID'\n# Select the columns we need to upload to the Google Sheet\ndf = data[['ga:dimension42','typeId']]",
"execution_count": 12,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "df.head()",
"execution_count": 13,
"outputs": [
{
"data": {
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>ga:dimension42</th>\n <th>typeId</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>1.001033e+09</td>\n <td>CLIENT_ID</td>\n </tr>\n <tr>\n <th>1</th>\n <td>1.001090e+09</td>\n <td>CLIENT_ID</td>\n </tr>\n <tr>\n <th>2</th>\n <td>1.007582e+09</td>\n <td>CLIENT_ID</td>\n </tr>\n <tr>\n <th>3</th>\n <td>1.010111e+09</td>\n <td>CLIENT_ID</td>\n </tr>\n <tr>\n <th>4</th>\n <td>1.014999e+08</td>\n <td>CLIENT_ID</td>\n </tr>\n </tbody>\n</table>\n</div>",
"text/plain": " ga:dimension42 typeId\n0 1.001033e+09 CLIENT_ID\n1 1.001090e+09 CLIENT_ID\n2 1.007582e+09 CLIENT_ID\n3 1.010111e+09 CLIENT_ID\n4 1.014999e+08 CLIENT_ID"
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "# Configure the connection \nscope = ['https://spreadsheets.google.com/feeds']\ncredentials = ServiceAccountCredentials.from_json_keyfile_name('key/service_account_GS.json', scope)\ngc = gspread.authorize(credentials)\n#### The sprad sheet ID, which can be taken from the link to the sheet\nspreadsheet_key = '1uHQgUSElHaKSKATOYUxZjwnNx4NuR0U0XDjieIkTToM'",
"execution_count": 16,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "# Set the sheet name you want to upload data to and the start cell where the upload data begins \nwks_name = 'Main'\ncell_of_start_df = 'A5'\n# upload the dataframe of the clients we want to delete\nd2g.upload(df,\n spreadsheet_key,\n wks_name,\n credentials=credentials,\n col_names=False,\n row_names=False,\n start_cell = cell_of_start_df,\n clean=False)\nprint ('The sheet is updated successfully')",
"execution_count": 18,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": "The sheet is updated successfully\n"
}
]
}
],
"metadata": {
"kernelspec": {
"name": "python3",
"display_name": "Python 3",
"language": "python"
},
"language_info": {
"name": "python",
"version": "3.6.4",
"mimetype": "text/x-python",
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"pygments_lexer": "ipython3",
"nbconvert_exporter": "python",
"file_extension": ".py"
},
"latex_envs": {
"eqNumInitial": 1,
"eqLabelWithNumbers": true,
"current_citInitial": 1,
"cite_by": "apalike",
"bibliofile": "biblio.bib",
"LaTeX_envs_menu_present": true,
"labels_anchors": false,
"latex_user_defs": false,
"user_envs_cfg": false,
"report_style_numbering": false,
"autoclose": false,
"autocomplete": true,
"hotkeys": {
"equation": "Ctrl-E",
"itemize": "Ctrl-I"
}
},
"gist": {
"id": "",
"data": {
"description": "Get Data from Analytics Reporting API, Connect Jupyter Notebook with Spread Sheet",
"public": true
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment