Skip to content

Instantly share code, notes, and snippets.

@kemsakurai
Created May 22, 2018 15:45
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kemsakurai/ca95676523dfd27bb7b3971cdf339533 to your computer and use it in GitHub Desktop.
Save kemsakurai/ca95676523dfd27bb7b3971cdf339533 to your computer and use it in GitHub Desktop.
Google Analytics の地域データを、pandas 経由で RDBに登録する Django コマンド一式
from django.conf import settings
CLIENT_JSON_PATH = settings.BASE_DIR + "/ga_client.json"
VIEW_ID = settings.GA_VIEW_ID
# DATABASE_PATH
DATABASE_PATH = settings.BASE_DIR + '/output/sqlite/superset_adapter.db'
def convert_datetime(x):
try:
import datetime
x = int(x) / 1000
return datetime.datetime.fromtimestamp(x);
except TypeError:
return x
except ValueError:
return x
import sqlite3
from django.core.management.base import BaseCommand
from google2pandas import *
from ga.management import commands
class Command(BaseCommand):
help = "Collect area codes."
def handle(self, *args, **options):
query = {
'reportRequests': [{
'viewId': commands.VIEW_ID,
'dateRanges': [{
'startDate': '30daysAgo',
'endDate': 'today'}
],
'dimensions': [
{'name': 'ga:dimension10'},
{'name': 'ga:dimension8'},
{'name': 'ga:cityId'},
{'name': 'ga:continentId'},
{'name': 'ga:countryIsoCode'},
{'name': 'ga:regionId'},
{'name': 'ga:regionIsoCode'},
{'name': 'ga:subContinentCode'},
],
'metrics': [
{'expression': 'ga:users'},
{'expression': 'ga:bounceRate'}
]
}]
}
conn = GoogleAnalyticsQueryV4(secrets=commands.CLIENT_JSON_PATH)
df = conn.execute_query(query)
# dimension10 を変換する
df['dimension10'] = df['dimension10'].apply(lambda x: commands.convert_datetime(x))
df['users'] = df['users'].astype('int64')
df['bounceRate'] = df['bounceRate'].astype('float64')
# dbコネクト
conn = sqlite3.connect(commands.DATABASE_PATH)
# tableを作成する
df.to_sql("ga_area_codes_reports", conn, index=False, if_exists="replace")
# コネクションを閉じる
conn.close()
import sqlite3
from django.core.management.base import BaseCommand
from google2pandas import *
from ga.management import commands
class Command(BaseCommand):
help = "Collect area names."
def handle(self, *args, **options):
query = {
'reportRequests': [{
'viewId': commands.VIEW_ID,
'dateRanges': [{
'startDate': '30daysAgo',
'endDate': 'today'}
],
'dimensions': [
{'name': 'ga:dimension10'},
{'name': 'ga:dimension8'},
{'name': 'ga:continent'},
{'name': 'ga:subContinent'},
{'name': 'ga:country'},
{'name': 'ga:region'},
{'name': 'ga:metro'},
{'name': 'ga:city'},
],
'metrics': [
{'expression': 'ga:users'},
{'expression': 'ga:bounceRate'}
]
}]
}
conn = GoogleAnalyticsQueryV4(secrets=commands.CLIENT_JSON_PATH)
df = conn.execute_query(query)
# dimension10 を変換する
df['dimension10'] = df['dimension10'].apply(lambda x: commands.convert_datetime(x))
df['users'] = df['users'].astype('int64')
df['bounceRate'] = df['bounceRate'].astype('float64')
# dbコネクト
conn = sqlite3.connect(commands.DATABASE_PATH)
# tableを作成する
df.to_sql("ga_area_names_reports", conn, index=False, if_exists="replace")
# コネクションを閉じる
conn.close()
import sqlite3
from django.core.management.base import BaseCommand
from google2pandas import *
from ga.management import commands
class Command(BaseCommand):
help = "Collect domain names and service provider."
def handle(self, *args, **options):
query = {
'reportRequests': [{
'viewId': commands.VIEW_ID,
'dateRanges': [{
'startDate': '30daysAgo',
'endDate': 'today'}
],
'dimensions': [
{'name': 'ga:dimension10'},
{'name': 'ga:dimension8'},
{'name': 'ga:networkDomain'},
{'name': 'ga:networkLocation'},
],
'metrics': [
{'expression': 'ga:users'},
{'expression': 'ga:bounceRate'}
]
}]
}
conn = GoogleAnalyticsQueryV4(secrets=commands.CLIENT_JSON_PATH)
df = conn.execute_query(query)
# dimension10 を変換する
df['dimension10'] = df['dimension10'].apply(lambda x: commands.convert_datetime(x))
df['users'] = df['users'].astype('int64')
df['bounceRate'] = df['bounceRate'].astype('float64')
# dbコネクト
conn = sqlite3.connect(commands.DATABASE_PATH)
# tableを作成する
df.to_sql("ga_dn_lsp_reports", conn, index=False, if_exists="replace")
# コネクションを閉じる
conn.close()
import sqlite3
from django.core.management.base import BaseCommand
from google2pandas import *
from ga.management import commands
class Command(BaseCommand):
help = "Collect domain names and service provider."
def handle(self, *args, **options):
query = {
'reportRequests': [{
'viewId': commands.VIEW_ID,
'dateRanges': [{
'startDate': '30daysAgo',
'endDate': 'today'}
],
'dimensions': [
{'name': 'ga:dimension10'},
{'name': 'ga:dimension8'},
{'name': 'ga:networkDomain'},
{'name': 'ga:networkLocation'},
],
'metrics': [
{'expression': 'ga:users'},
{'expression': 'ga:bounceRate'}
]
}]
}
conn = GoogleAnalyticsQueryV4(secrets=commands.CLIENT_JSON_PATH)
df = conn.execute_query(query)
# dimension10 を変換する
df['dimension10'] = df['dimension10'].apply(lambda x: commands.convert_datetime(x))
df['users'] = df['users'].astype('int64')
df['bounceRate'] = df['bounceRate'].astype('float64')
# dbコネクト
conn = sqlite3.connect(commands.DATABASE_PATH)
# tableを作成する
df.to_sql("ga_dn_lsp_reports", conn, index=False, if_exists="replace")
# コネクションを閉じる
conn.close()

前提で実施すること

  • settings.py に、GA_VIEW_ID を設定する。
  • BASE_DIR 直下に、サービス アカウント キー を配置する。
  • pandas と、google2pandas を インストールしておく。

補足

  • データは全て 同一の sqlite ファイルに、登録する。
  • Google Analytics の 指標、users(ユーザー数)、bounceRate(直帰率)はSUM 等の集計関数実行のため、数値に変換する。
  • データ登録時は、pandas の index は登録しない。 DELETE ALL 、 INSERT ALL で全件削除、全件登録する。
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment