|
GCS_DIR = 'gs://YOUR_GCS_DIR' |
|
DATA_SET_ID = 'YOUR_DATA_SET_ID' |
|
TABLE_PREFIX = "ga_sessions_" |
|
GOOGLE_APPLICATION_CREDENTIALS_PATH = './CREDENTIALS.json' |
|
|
|
|
|
def main(): |
|
import argparse |
|
parser = argparse.ArgumentParser(description='load_data_to_bigquery_from_gcs') |
|
parser.add_argument('date', help='date') |
|
args = parser.parse_args() |
|
|
|
table_name = file_name = TABLE_PREFIX + args.date.replace("-", "") |
|
print("Load start [table_name]", table_name) |
|
import os |
|
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = GOOGLE_APPLICATION_CREDENTIALS_PATH |
|
|
|
from google.cloud import bigquery |
|
client = bigquery.Client() |
|
data_set_id = DATA_SET_ID |
|
data_set_ref = client.dataset(data_set_id) |
|
job_config = bigquery.LoadJobConfig() |
|
job_config.autodetect = False |
|
job_config.quote_character = '"' |
|
job_config.allow_quoted_newlines = True |
|
job_config.skip_leading_rows = 1 |
|
# The source format defaults to CSV, so the line below is optional. |
|
job_config.source_format = bigquery.SourceFormat.CSV |
|
job_config.schema = [ |
|
bigquery.SchemaField('avgSessionDuration','FLOAT','NULLABLE'), |
|
bigquery.SchemaField('bounceRate','FLOAT','NULLABLE'), |
|
bigquery.SchemaField('bounces','INTEGER','NULLABLE'), |
|
bigquery.SchemaField('dimension10', 'TIMESTAMP', 'NULLABLE'), |
|
bigquery.SchemaField('dimension8', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('hits', 'INTEGER', 'NULLABLE'), |
|
bigquery.SchemaField('sessionDuration', 'FLOAT', 'NULLABLE'), |
|
bigquery.SchemaField('sessionDurationBucket', 'INTEGER', 'NULLABLE'), |
|
bigquery.SchemaField('sessions', 'INTEGER', 'NULLABLE'), |
|
bigquery.SchemaField('uniqueDimensionCombinations', 'INTEGER', 'NULLABLE'), |
|
bigquery.SchemaField('continent', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('country', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('metro', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('region', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('subContinent', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('visits', 'INTEGER', 'NULLABLE'), |
|
bigquery.SchemaField('city', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('latitude', 'FLOAT', 'NULLABLE'), |
|
bigquery.SchemaField('longitude', 'FLOAT', 'NULLABLE'), |
|
bigquery.SchemaField('networkDomain', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('cityId', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('continentId', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('countryIsoCode', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('networkLocation', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('metroId', 'INTEGER', 'NULLABLE'), |
|
bigquery.SchemaField('regionId', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('regionIsoCode', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('subContinentCode', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('goalAbandonRateAll', 'FLOAT', 'NULLABLE'), |
|
bigquery.SchemaField('goalAbandonsAll', 'INTEGER', 'NULLABLE'), |
|
bigquery.SchemaField('goalCompletionLocation', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('goalCompletionsAll', 'INTEGER', 'NULLABLE'), |
|
bigquery.SchemaField('goalConversionRateAll', 'FLOAT', 'NULLABLE'), |
|
bigquery.SchemaField('goalPreviousStep1', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('goalPreviousStep2', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('goalPreviousStep3', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('goalStartsAll', 'INTEGER', 'NULLABLE'), |
|
bigquery.SchemaField('goalValueAll', 'FLOAT', 'NULLABLE'), |
|
bigquery.SchemaField('goalValuePerSession', 'FLOAT', 'NULLABLE'), |
|
bigquery.SchemaField('goal1Starts', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('goal1Completions', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('goal1Value', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('goal1ConversionRate', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('goal1Abandons', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('goal1AbandonRate', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('goal2AbandonRate', 'FLOAT', 'NULLABLE'), |
|
bigquery.SchemaField('goal2Abandons', 'INTEGER', 'NULLABLE'), |
|
bigquery.SchemaField('goal2Completions', 'INTEGER', 'NULLABLE'), |
|
bigquery.SchemaField('goal2ConversionRate', 'FLOAT', 'NULLABLE'), |
|
bigquery.SchemaField('goal2Starts', 'INTEGER', 'NULLABLE'), |
|
bigquery.SchemaField('goal2Value', 'FLOAT', 'NULLABLE'), |
|
bigquery.SchemaField('goal3Starts', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('goal3Completions', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('goal3Value', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('goal3ConversionRate', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('goal3Abandons', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('goal3AbandonRate', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('goal4Starts', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('goal4Completions', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('goal4Value', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('goal4ConversionRate', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('goal4Abandons', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('goal4AbandonRate', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('browser', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('browserVersion', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('operatingSystem', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('operatingSystemVersion', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('mobileDeviceBranding', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('mobileDeviceInfo', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('mobileDeviceModel', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('mobileInputSelector', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('browserSize', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('dataSource', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('deviceCategory', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('mobileDeviceMarketingName', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('flashVersion', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('javaEnabled', 'BOOLEAN', 'NULLABLE'), |
|
bigquery.SchemaField('language', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('screenColors', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('sourcePropertyDisplayName', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('sourcePropertyTrackingId', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('screenResolution', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('avgTimeOnPage', 'FLOAT', 'NULLABLE'), |
|
bigquery.SchemaField('entranceRate', 'FLOAT', 'NULLABLE'), |
|
bigquery.SchemaField('entrances', 'INTEGER', 'NULLABLE'), |
|
bigquery.SchemaField('exitRate', 'FLOAT', 'NULLABLE'), |
|
bigquery.SchemaField('exits', 'INTEGER', 'NULLABLE'), |
|
bigquery.SchemaField('hostname', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('pagePath', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('pagePathLevel1', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('pageValue', 'FLOAT', 'NULLABLE'), |
|
bigquery.SchemaField('pageviews', 'INTEGER', 'NULLABLE'), |
|
bigquery.SchemaField('pageviewsPerSession', 'FLOAT', 'NULLABLE'), |
|
bigquery.SchemaField('timeOnPage', 'FLOAT', 'NULLABLE'), |
|
bigquery.SchemaField('uniquePageviews', 'INTEGER', 'NULLABLE'), |
|
bigquery.SchemaField('pagePathLevel2', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('pagePathLevel3', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('pagePathLevel4', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('landingPagePath', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('pageTitle', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('secondPagePath', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('exitPagePath', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('pageDepth', 'INTEGER', 'NULLABLE'), |
|
bigquery.SchemaField('previousPagePath', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('eventCategory', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('eventAction', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('eventLabel', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('totalEvents', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('uniqueEvents', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('eventValue', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('avgEventValue', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('sessionsWithEvent', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('eventsPerSessionWithEvent', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('dimension1', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('dimension2', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('dimension3', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('dimension4', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('dimension5', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('dimension6', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('dimension7', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('dimension9', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('dimension11', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('dimension12', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('dimension13', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('dimension14', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('dimension15', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('dimension16', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('dimension17', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('dimension18', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('dimension19', 'STRING', 'NULLABLE'), |
|
bigquery.SchemaField('fullPagePath', 'STRING', 'NULLABLE') |
|
] |
|
table_ref = client.dataset(data_set_id).table(table_name) |
|
try: |
|
client.delete_table(table_ref) # API request |
|
except: |
|
pass |
|
uri = GCS_DIR + table_name + '.csv' |
|
load_job = client.load_table_from_uri( |
|
uri, |
|
data_set_ref.table(table_name), |
|
job_config=job_config) # API request |
|
|
|
assert load_job.job_type == 'load' |
|
|
|
try: |
|
load_job.result() # Waits for table load to complete. |
|
except Exception as e: |
|
for error in load_job.errors: |
|
print("Error detail [message]", error.get("message")) |
|
raise e |
|
|
|
assert load_job.state == 'DONE' |
|
|
|
|
|
if __name__ == '__main__': |
|
main() |