Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save rutgerhofste/69bb32a8a7020517dc20c543b7715e09 to your computer and use it in GitHub Desktop.
Save rutgerhofste/69bb32a8a7020517dc20c543b7715e09 to your computer and use it in GitHub Desktop.
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# tutorial https://cloud.google.com/bigquery/docs/bigqueryml-scientist-start"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import os\n",
"import pandas as pd\n",
"from google.cloud import bigquery\n",
"%load_ext google.cloud.bigquery"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"os.environ[\"GOOGLE_APPLICATION_CREDENTIALS\"] = \"/.google.json\"\n",
"os.environ[\"GOOGLE_CLOUD_PROJECT\"] = \"aqueduct30\""
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"client = bigquery.Client()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Step two create your dataset"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Dataset(DatasetReference('aqueduct30', 'bqml_tutorial'))"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dataset = bigquery.Dataset(client.dataset('bqml_tutorial'))\n",
"dataset.location = 'US'\n",
"client.create_dataset(dataset)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Step three: Create your model"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Executing query with job ID: 5b59d7e8-cdf4-4733-a263-d6467e66fb4a\n",
"Query executing: 445.85s"
]
},
{
"ename": "ValueError",
"evalue": "Table has no schema: call 'client.get_table()'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-8-a00c87c72132>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mget_ipython\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrun_cell_magic\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'bigquery'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m''\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'CREATE OR REPLACE MODEL `bqml_tutorial.sample_model`\\nOPTIONS(model_type=\\'logistic_reg\\') AS\\nSELECT\\n IF(totals.transactions IS NULL, 0, 1) AS label,\\n IFNULL(device.operatingSystem, \"\") AS os,\\n device.isMobile AS is_mobile,\\n IFNULL(geoNetwork.country, \"\") AS country,\\n IFNULL(totals.pageviews, 0) AS pageviews\\nFROM\\n `bigquery-public-data.google_analytics_sample.ga_sessions_*`\\nWHERE\\n _TABLE_SUFFIX BETWEEN \\'20160801\\' AND \\'20170630\\''\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m/opt/anaconda3/envs/python35/lib/python3.5/site-packages/IPython/core/interactiveshell.py\u001b[0m in \u001b[0;36mrun_cell_magic\u001b[0;34m(self, magic_name, line, cell)\u001b[0m\n\u001b[1;32m 2165\u001b[0m \u001b[0mmagic_arg_s\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvar_expand\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mline\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstack_depth\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2166\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbuiltin_trap\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2167\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmagic_arg_s\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcell\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2168\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2169\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/opt/anaconda3/envs/python35/lib/python3.5/site-packages/google/cloud/bigquery/magics.py\u001b[0m in \u001b[0;36m_cell_magic\u001b[0;34m(line, query)\u001b[0m\n\u001b[1;32m 270\u001b[0m \u001b[0mjob_config\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mbigquery\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjob\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mQueryJobConfig\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 271\u001b[0m \u001b[0mjob_config\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0muse_legacy_sql\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0muse_legacy_sql\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 272\u001b[0;31m \u001b[0mquery_job\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_run_query\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mclient\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mquery\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mjob_config\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 273\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 274\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mverbose\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/opt/anaconda3/envs/python35/lib/python3.5/site-packages/google/cloud/bigquery/magics.py\u001b[0m in \u001b[0;36m_run_query\u001b[0;34m(client, query, job_config)\u001b[0m\n\u001b[1;32m 220\u001b[0m time.time() - start_time), end='')\n\u001b[1;32m 221\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 222\u001b[0;31m \u001b[0mquery_job\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mresult\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0.5\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 223\u001b[0m \u001b[0;32mbreak\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 224\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mfutures\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTimeoutError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/opt/anaconda3/envs/python35/lib/python3.5/site-packages/google/cloud/bigquery/job.py\u001b[0m in \u001b[0;36mresult\u001b[0;34m(self, timeout, retry)\u001b[0m\n\u001b[1;32m 2508\u001b[0m \u001b[0mdest_table_ref\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdestination\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2509\u001b[0m \u001b[0mdest_table\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mTable\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdest_table_ref\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mschema\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mschema\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2510\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_client\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlist_rows\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdest_table\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mretry\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mretry\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2511\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2512\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mto_dataframe\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/opt/anaconda3/envs/python35/lib/python3.5/site-packages/google/cloud/bigquery/client.py\u001b[0m in \u001b[0;36mlist_rows\u001b[0;34m(self, table, selected_fields, max_results, page_token, start_index, page_size, retry)\u001b[0m\n\u001b[1;32m 1403\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtable\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mTable\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1404\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtable\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mschema\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1405\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_TABLE_HAS_NO_SCHEMA\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1406\u001b[0m \u001b[0mschema\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtable\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mschema\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1407\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mValueError\u001b[0m: Table has no schema: call 'client.get_table()'"
]
}
],
"source": [
"%%bigquery\n",
"CREATE OR REPLACE MODEL `bqml_tutorial.sample_model`\n",
"OPTIONS(model_type='logistic_reg') AS\n",
"SELECT\n",
" IF(totals.transactions IS NULL, 0, 1) AS label,\n",
" IFNULL(device.operatingSystem, \"\") AS os,\n",
" device.isMobile AS is_mobile,\n",
" IFNULL(geoNetwork.country, \"\") AS country,\n",
" IFNULL(totals.pageviews, 0) AS pageviews\n",
"FROM\n",
" `bigquery-public-data.google_analytics_sample.ga_sessions_*`\n",
"WHERE\n",
" _TABLE_SUFFIX BETWEEN '20160801' AND '20170630'"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 35",
"language": "python",
"name": "python35"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment