Created
August 1, 2018 12:45
-
-
Save rutgerhofste/69bb32a8a7020517dc20c543b7715e09 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"# tutorial https://cloud.google.com/bigquery/docs/bigqueryml-scientist-start" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"import os\n", | |
"import pandas as pd\n", | |
"from google.cloud import bigquery\n", | |
"%load_ext google.cloud.bigquery" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"os.environ[\"GOOGLE_APPLICATION_CREDENTIALS\"] = \"/.google.json\"\n", | |
"os.environ[\"GOOGLE_CLOUD_PROJECT\"] = \"aqueduct30\"" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"client = bigquery.Client()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Step two create your dataset" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"Dataset(DatasetReference('aqueduct30', 'bqml_tutorial'))" | |
] | |
}, | |
"execution_count": 6, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"dataset = bigquery.Dataset(client.dataset('bqml_tutorial'))\n", | |
"dataset.location = 'US'\n", | |
"client.create_dataset(dataset)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Step three: Create your model" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Executing query with job ID: 5b59d7e8-cdf4-4733-a263-d6467e66fb4a\n", | |
"Query executing: 445.85s" | |
] | |
}, | |
{ | |
"ename": "ValueError", | |
"evalue": "Table has no schema: call 'client.get_table()'", | |
"output_type": "error", | |
"traceback": [ | |
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", | |
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", | |
"\u001b[0;32m<ipython-input-8-a00c87c72132>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mget_ipython\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrun_cell_magic\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'bigquery'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m''\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'CREATE OR REPLACE MODEL `bqml_tutorial.sample_model`\\nOPTIONS(model_type=\\'logistic_reg\\') AS\\nSELECT\\n IF(totals.transactions IS NULL, 0, 1) AS label,\\n IFNULL(device.operatingSystem, \"\") AS os,\\n device.isMobile AS is_mobile,\\n IFNULL(geoNetwork.country, \"\") AS country,\\n IFNULL(totals.pageviews, 0) AS pageviews\\nFROM\\n `bigquery-public-data.google_analytics_sample.ga_sessions_*`\\nWHERE\\n _TABLE_SUFFIX BETWEEN \\'20160801\\' AND \\'20170630\\''\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", | |
"\u001b[0;32m/opt/anaconda3/envs/python35/lib/python3.5/site-packages/IPython/core/interactiveshell.py\u001b[0m in \u001b[0;36mrun_cell_magic\u001b[0;34m(self, magic_name, line, cell)\u001b[0m\n\u001b[1;32m 2165\u001b[0m \u001b[0mmagic_arg_s\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvar_expand\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mline\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstack_depth\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2166\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbuiltin_trap\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2167\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmagic_arg_s\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcell\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2168\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2169\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m/opt/anaconda3/envs/python35/lib/python3.5/site-packages/google/cloud/bigquery/magics.py\u001b[0m in \u001b[0;36m_cell_magic\u001b[0;34m(line, query)\u001b[0m\n\u001b[1;32m 270\u001b[0m \u001b[0mjob_config\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mbigquery\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjob\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mQueryJobConfig\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 271\u001b[0m \u001b[0mjob_config\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0muse_legacy_sql\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0muse_legacy_sql\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 272\u001b[0;31m \u001b[0mquery_job\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_run_query\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mclient\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mquery\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mjob_config\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 273\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 274\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mverbose\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m/opt/anaconda3/envs/python35/lib/python3.5/site-packages/google/cloud/bigquery/magics.py\u001b[0m in \u001b[0;36m_run_query\u001b[0;34m(client, query, job_config)\u001b[0m\n\u001b[1;32m 220\u001b[0m time.time() - start_time), end='')\n\u001b[1;32m 221\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 222\u001b[0;31m \u001b[0mquery_job\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mresult\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0.5\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 223\u001b[0m \u001b[0;32mbreak\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 224\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mfutures\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTimeoutError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m/opt/anaconda3/envs/python35/lib/python3.5/site-packages/google/cloud/bigquery/job.py\u001b[0m in \u001b[0;36mresult\u001b[0;34m(self, timeout, retry)\u001b[0m\n\u001b[1;32m 2508\u001b[0m \u001b[0mdest_table_ref\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdestination\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2509\u001b[0m \u001b[0mdest_table\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mTable\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdest_table_ref\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mschema\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mschema\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2510\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_client\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlist_rows\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdest_table\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mretry\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mretry\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2511\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2512\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mto_dataframe\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m/opt/anaconda3/envs/python35/lib/python3.5/site-packages/google/cloud/bigquery/client.py\u001b[0m in \u001b[0;36mlist_rows\u001b[0;34m(self, table, selected_fields, max_results, page_token, start_index, page_size, retry)\u001b[0m\n\u001b[1;32m 1403\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtable\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mTable\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1404\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtable\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mschema\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1405\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_TABLE_HAS_NO_SCHEMA\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1406\u001b[0m \u001b[0mschema\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtable\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mschema\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1407\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;31mValueError\u001b[0m: Table has no schema: call 'client.get_table()'" | |
] | |
} | |
], | |
"source": [ | |
"%%bigquery\n", | |
"CREATE OR REPLACE MODEL `bqml_tutorial.sample_model`\n", | |
"OPTIONS(model_type='logistic_reg') AS\n", | |
"SELECT\n", | |
" IF(totals.transactions IS NULL, 0, 1) AS label,\n", | |
" IFNULL(device.operatingSystem, \"\") AS os,\n", | |
" device.isMobile AS is_mobile,\n", | |
" IFNULL(geoNetwork.country, \"\") AS country,\n", | |
" IFNULL(totals.pageviews, 0) AS pageviews\n", | |
"FROM\n", | |
" `bigquery-public-data.google_analytics_sample.ga_sessions_*`\n", | |
"WHERE\n", | |
" _TABLE_SUFFIX BETWEEN '20160801' AND '20170630'" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 35", | |
"language": "python", | |
"name": "python35" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.5.4" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment