Skip to content

Instantly share code, notes, and snippets.

@mwacc
Created September 5, 2018 06:06
Show Gist options
  • Save mwacc/37ec80810120cc78d2ae869a787ce761 to your computer and use it in GitHub Desktop.
Save mwacc/37ec80810120cc78d2ae869a787ce761 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
{
"cells": [
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [],
"source": [
"import google.datalab.bigquery as bq\n",
"import seaborn as sns\n",
"import pandas as pd\n",
"import numpy as np\n",
"import shutil"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Time is: 1.828289 sec\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>f0_</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1108779463</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" f0_\n",
"0 1108779463"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"q = \"\"\"\n",
"SELECT\n",
" count(1)\n",
"FROM\n",
" `nyc-tlc.yellow.trips` \n",
"\"\"\"\n",
"import time\n",
"start_at = time.time()\n",
"res = bq.Query(q).execute().result().to_dataframe()\n",
"print \"Time is: %f sec\" % (time.time() - start_at)\n",
"res[:20]"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Time is: 1.826464 sec\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>month</th>\n",
" <th>trips</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1973-05-01</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2009-01-01</td>\n",
" <td>14085510</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2009-02-01</td>\n",
" <td>13379753</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2009-03-01</td>\n",
" <td>14391536</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2009-04-01</td>\n",
" <td>14292746</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>2009-05-01</td>\n",
" <td>14799079</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>2009-06-01</td>\n",
" <td>14182830</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>2009-07-01</td>\n",
" <td>13624285</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>2009-08-01</td>\n",
" <td>13689765</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>2009-09-01</td>\n",
" <td>13983818</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>2009-10-01</td>\n",
" <td>15600529</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>2009-11-01</td>\n",
" <td>14280039</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>2009-12-01</td>\n",
" <td>14584019</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>2010-01-01</td>\n",
" <td>14863988</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>2010-02-01</td>\n",
" <td>11145405</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>2010-03-01</td>\n",
" <td>12882425</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>2010-04-01</td>\n",
" <td>15141479</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>2010-05-01</td>\n",
" <td>15485628</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>2010-06-01</td>\n",
" <td>14822752</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>2010-07-01</td>\n",
" <td>14423765</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" month trips\n",
"0 1973-05-01 1\n",
"1 2009-01-01 14085510\n",
"2 2009-02-01 13379753\n",
"3 2009-03-01 14391536\n",
"4 2009-04-01 14292746\n",
"5 2009-05-01 14799079\n",
"6 2009-06-01 14182830\n",
"7 2009-07-01 13624285\n",
"8 2009-08-01 13689765\n",
"9 2009-09-01 13983818\n",
"10 2009-10-01 15600529\n",
"11 2009-11-01 14280039\n",
"12 2009-12-01 14584019\n",
"13 2010-01-01 14863988\n",
"14 2010-02-01 11145405\n",
"15 2010-03-01 12882425\n",
"16 2010-04-01 15141479\n",
"17 2010-05-01 15485628\n",
"18 2010-06-01 14822752\n",
"19 2010-07-01 14423765"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"q = \"\"\"\n",
"SELECT\n",
" DATE_TRUNC(EXTRACT(DATE FROM dropoff_datetime), MONTH) AS month,\n",
" COUNT(*) trips \n",
"FROM\n",
" `nyc-tlc.yellow.trips` \n",
"GROUP BY\n",
" month \n",
"ORDER BY\n",
" month \n",
"\"\"\"\n",
"import time\n",
"start_at = time.time()\n",
"res = bq.Query(q).execute().result().to_dataframe()\n",
"print \"Time is: %f sec\" % (time.time() - start_at)\n",
"res[:20]"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Time is: 2.135039 sec\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Hour</th>\n",
" <th>f0_</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0</td>\n",
" <td>16.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>16.7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2</td>\n",
" <td>17.2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>3</td>\n",
" <td>17.8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>4</td>\n",
" <td>20.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>5</td>\n",
" <td>22.2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>6</td>\n",
" <td>18.5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>7</td>\n",
" <td>14.7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>8</td>\n",
" <td>12.2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>9</td>\n",
" <td>11.9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>10</td>\n",
" <td>12.1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>11</td>\n",
" <td>11.9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>12</td>\n",
" <td>11.7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>13</td>\n",
" <td>11.9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>14</td>\n",
" <td>11.7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>15</td>\n",
" <td>11.7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>16</td>\n",
" <td>12.4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>17</td>\n",
" <td>12.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>18</td>\n",
" <td>11.8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>19</td>\n",
" <td>12.5</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Hour f0_\n",
"0 0 16.0\n",
"1 1 16.7\n",
"2 2 17.2\n",
"3 3 17.8\n",
"4 4 20.0\n",
"5 5 22.2\n",
"6 6 18.5\n",
"7 7 14.7\n",
"8 8 12.2\n",
"9 9 11.9\n",
"10 10 12.1\n",
"11 11 11.9\n",
"12 12 11.7\n",
"13 13 11.9\n",
"14 14 11.7\n",
"15 15 11.7\n",
"16 16 12.4\n",
"17 17 12.0\n",
"18 18 11.8\n",
"19 19 12.5"
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"q = \"\"\"\n",
"SELECT \n",
" extract(hour FROM pickup_datetime) AS Hour, \n",
" ROUND(AVG(trip_distance / TIMESTAMP_DIFF(dropoff_datetime, pickup_datetime, SECOND)) * 3600,1)\n",
"FROM `nyc-tlc.yellow.trips` \n",
"WHERE dropoff_datetime > pickup_datetime\n",
" AND fare_amount/trip_distance \n",
" BETWEEN 2\n",
" AND 10\n",
" AND trip_distance > 0\n",
"GROUP BY Hour\n",
"ORDER BY Hour\n",
"\n",
"\"\"\"\n",
"import time\n",
"start_at = time.time()\n",
"res = bq.Query(q).execute().result().to_dataframe()\n",
"print \"Time is: %f sec\" % (time.time() - start_at)\n",
"res[:20]"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Time is: 2.109586 sec\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>f0_</th>\n",
" <th>f1_</th>\n",
" <th>f2_</th>\n",
" <th>f3_</th>\n",
" <th>cnt</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>-73.13739</td>\n",
" <td>41.36614</td>\n",
" <td>-73.86530</td>\n",
" <td>40.77069</td>\n",
" <td>21</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>-73.13739</td>\n",
" <td>41.36614</td>\n",
" <td>-73.86532</td>\n",
" <td>40.77070</td>\n",
" <td>19</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>-73.13739</td>\n",
" <td>41.36614</td>\n",
" <td>-73.86494</td>\n",
" <td>40.77047</td>\n",
" <td>18</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>-73.13739</td>\n",
" <td>41.36614</td>\n",
" <td>-73.86532</td>\n",
" <td>40.77076</td>\n",
" <td>18</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>-73.13739</td>\n",
" <td>41.36614</td>\n",
" <td>-73.86494</td>\n",
" <td>40.77043</td>\n",
" <td>18</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>-73.13739</td>\n",
" <td>41.36614</td>\n",
" <td>-73.86179</td>\n",
" <td>40.76841</td>\n",
" <td>17</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>-73.13739</td>\n",
" <td>41.36614</td>\n",
" <td>-73.86513</td>\n",
" <td>40.77060</td>\n",
" <td>17</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>-73.13739</td>\n",
" <td>41.36614</td>\n",
" <td>-73.86182</td>\n",
" <td>40.76844</td>\n",
" <td>17</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>-73.13739</td>\n",
" <td>41.36614</td>\n",
" <td>-73.86163</td>\n",
" <td>40.76830</td>\n",
" <td>17</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>-73.13739</td>\n",
" <td>41.36614</td>\n",
" <td>-73.87091</td>\n",
" <td>40.77408</td>\n",
" <td>16</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>-73.13739</td>\n",
" <td>41.36614</td>\n",
" <td>-73.86539</td>\n",
" <td>40.77077</td>\n",
" <td>16</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>-73.13739</td>\n",
" <td>41.36614</td>\n",
" <td>-73.86172</td>\n",
" <td>40.76837</td>\n",
" <td>16</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>-73.13739</td>\n",
" <td>41.36614</td>\n",
" <td>-73.86172</td>\n",
" <td>40.76834</td>\n",
" <td>15</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>-73.13739</td>\n",
" <td>41.36614</td>\n",
" <td>-73.86182</td>\n",
" <td>40.76841</td>\n",
" <td>15</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>-73.13739</td>\n",
" <td>41.36614</td>\n",
" <td>-73.86530</td>\n",
" <td>40.77070</td>\n",
" <td>15</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>-73.13739</td>\n",
" <td>41.36614</td>\n",
" <td>-73.86182</td>\n",
" <td>40.76842</td>\n",
" <td>15</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>-73.13739</td>\n",
" <td>41.36614</td>\n",
" <td>-73.86165</td>\n",
" <td>40.76830</td>\n",
" <td>15</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>-73.13739</td>\n",
" <td>41.36614</td>\n",
" <td>-73.87283</td>\n",
" <td>40.77431</td>\n",
" <td>14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>-73.13739</td>\n",
" <td>41.36614</td>\n",
" <td>-73.86156</td>\n",
" <td>40.76825</td>\n",
" <td>14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>-73.13739</td>\n",
" <td>41.36614</td>\n",
" <td>-73.86179</td>\n",
" <td>40.76842</td>\n",
" <td>14</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" f0_ f1_ f2_ f3_ cnt\n",
"0 -73.13739 41.36614 -73.86530 40.77069 21\n",
"1 -73.13739 41.36614 -73.86532 40.77070 19\n",
"2 -73.13739 41.36614 -73.86494 40.77047 18\n",
"3 -73.13739 41.36614 -73.86532 40.77076 18\n",
"4 -73.13739 41.36614 -73.86494 40.77043 18\n",
"5 -73.13739 41.36614 -73.86179 40.76841 17\n",
"6 -73.13739 41.36614 -73.86513 40.77060 17\n",
"7 -73.13739 41.36614 -73.86182 40.76844 17\n",
"8 -73.13739 41.36614 -73.86163 40.76830 17\n",
"9 -73.13739 41.36614 -73.87091 40.77408 16\n",
"10 -73.13739 41.36614 -73.86539 40.77077 16\n",
"11 -73.13739 41.36614 -73.86172 40.76837 16\n",
"12 -73.13739 41.36614 -73.86172 40.76834 15\n",
"13 -73.13739 41.36614 -73.86182 40.76841 15\n",
"14 -73.13739 41.36614 -73.86530 40.77070 15\n",
"15 -73.13739 41.36614 -73.86182 40.76842 15\n",
"16 -73.13739 41.36614 -73.86165 40.76830 15\n",
"17 -73.13739 41.36614 -73.87283 40.77431 14\n",
"18 -73.13739 41.36614 -73.86156 40.76825 14\n",
"19 -73.13739 41.36614 -73.86179 40.76842 14"
]
},
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"q = \"\"\"\n",
"SELECT\n",
" round(pickup_longitude, 5), \n",
" round(pickup_latitude, 5), \n",
" round(dropoff_longitude,5), \n",
" round(dropoff_latitude,5),\n",
" count(1) as cnt\n",
"FROM\n",
" `nyc-tlc.yellow.trips` \n",
"where\n",
" trip_distance > 10\n",
" AND fare_amount >= 2.5\n",
" AND pickup_longitude > -78\n",
" AND pickup_longitude < -70\n",
" AND dropoff_longitude > -78\n",
" AND dropoff_longitude < -70\n",
" AND pickup_latitude > 37\n",
" AND pickup_latitude < 45\n",
" AND dropoff_latitude > 37\n",
" AND dropoff_latitude < 45\n",
" AND passenger_count > 0 \n",
" AND round(pickup_longitude, 5) != round(dropoff_longitude, 5)\n",
" AND round(pickup_latitude, 5) != round(dropoff_latitude, 5)\n",
"group by round(pickup_longitude, 5), round(pickup_latitude, 5), round(dropoff_longitude,5), round(dropoff_latitude,5)\n",
"order by cnt DESC\n",
"LIMIT 20\n",
"\"\"\"\n",
"import time\n",
"start_at = time.time()\n",
"res = bq.Query(q).execute().result().to_dataframe()\n",
"print \"Time is: %f sec\" % (time.time() - start_at)\n",
"res[:20]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.15"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment