Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save xhluca/80163851457e559e06ea71003b26ef32 to your computer and use it in GitHub Desktop.
Save xhluca/80163851457e559e06ea71003b26ef32 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<script>requirejs.config({paths: { 'plotly': ['https://cdn.plot.ly/plotly-latest.min']},});if(!window.Plotly) {{require(['plotly'],function(plotly) {window.Plotly=plotly;});}}</script>"
],
"text/vnd.plotly.v1+html": [
"<script>requirejs.config({paths: { 'plotly': ['https://cdn.plot.ly/plotly-latest.min']},});if(!window.Plotly) {{require(['plotly'],function(plotly) {window.Plotly=plotly;});}}</script>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import time\n",
"\n",
"import pandas as pd\n",
"import plotly.offline as py # Use \"import plotly.plotly\" for online graphs\n",
"import plotly.graph_objs as go\n",
"import numpy as np\n",
"\n",
"py.init_notebook_mode(connected=True) # Plots inside Jupyter Notebook"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"3.0.0rc10\n"
]
}
],
"source": [
"from plotly import __version__\n",
"print(__version__) # requires version >= 1.9.0"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Loading Data"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(4740357, 6)\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>start_date</th>\n",
" <th>start_station_code</th>\n",
" <th>end_date</th>\n",
" <th>end_station_code</th>\n",
" <th>duration_sec</th>\n",
" <th>is_member</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2017-04-15 00:00</td>\n",
" <td>7060</td>\n",
" <td>2017-04-15 00:31</td>\n",
" <td>7060</td>\n",
" <td>1841</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2017-04-15 00:01</td>\n",
" <td>6173</td>\n",
" <td>2017-04-15 00:10</td>\n",
" <td>6173</td>\n",
" <td>553</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2017-04-15 00:01</td>\n",
" <td>6203</td>\n",
" <td>2017-04-15 00:04</td>\n",
" <td>6204</td>\n",
" <td>195</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2017-04-15 00:01</td>\n",
" <td>6104</td>\n",
" <td>2017-04-15 00:06</td>\n",
" <td>6114</td>\n",
" <td>285</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2017-04-15 00:01</td>\n",
" <td>6174</td>\n",
" <td>2017-04-15 00:11</td>\n",
" <td>6174</td>\n",
" <td>569</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" start_date start_station_code end_date end_station_code \\\n",
"0 2017-04-15 00:00 7060 2017-04-15 00:31 7060 \n",
"1 2017-04-15 00:01 6173 2017-04-15 00:10 6173 \n",
"2 2017-04-15 00:01 6203 2017-04-15 00:04 6204 \n",
"3 2017-04-15 00:01 6104 2017-04-15 00:06 6114 \n",
"4 2017-04-15 00:01 6174 2017-04-15 00:11 6174 \n",
"\n",
" duration_sec is_member \n",
"0 1841 1 \n",
"1 553 1 \n",
"2 195 1 \n",
"3 285 1 \n",
"4 569 1 "
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Collect all the 2018 \n",
"df_ls = []\n",
"\n",
"df_ls = [pd.read_csv(f'data/Bixi/2017/OD_2017-{n:02d}.csv') for n in range(4,12)]\n",
"\n",
"df = pd.concat(df_ls)\n",
"\n",
"print(df.shape)\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"def trace(x,y):\n",
" return go.Scattergl(\n",
" x=x,\n",
" y=y,\n",
" mode='markers',\n",
" marker=dict(\n",
" size=2,\n",
" symbol='circle'\n",
" )\n",
" )"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Plotly 2.7"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The output of the run is cleared due to overwelming size of the file when it is saved"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Plotting 500,000 points"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_small = df.sample(n=500000, random_state=1)\n",
"x = df_small['start_station_code']\n",
"y = df_small['duration_sec']\n",
"\n",
"t1 = time.time()\n",
"fig = go.Figure(data=[trace(x,y)])\n",
"\n",
"py.iplot(fig, filename='simple-scatter')"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"7.0769500732421875\n"
]
}
],
"source": [
"t2 = time.time()\n",
"print(t2-t1)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Plotting 1,000,000 points"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_small = df.sample(n=1000000, random_state=1)\n",
"x = df_small['start_station_code']\n",
"y = df_small['duration_sec']\n",
"\n",
"t3 = time.time()\n",
"fig = go.Figure(data=[trace(x,y)])\n",
"\n",
"py.iplot(fig, filename='simple-scatter')"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"15.005493879318237\n"
]
}
],
"source": [
"t4 = time.time()\n",
"print(t4-t3)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Plotting 2,000,000 points"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_small = df.sample(n=2000000, random_state=1)\n",
"x = df_small['start_station_code']\n",
"y = df_small['duration_sec']\n",
"\n",
"t5 = time.time()\n",
"fig = go.Figure(data=[trace(x,y)])\n",
"\n",
"py.iplot(fig, filename='simple-scatter')"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"29.524930715560913\n"
]
}
],
"source": [
"t6 = time.time()\n",
"print(t6-t5)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Plotting 500,000 points"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1.1366443634033203\n"
]
}
],
"source": [
"df_small = df.sample(n=500000, random_state=1)\n",
"x = df_small['start_station_code']\n",
"y = df_small['duration_sec']\n",
"\n",
"t1 = time.time()\n",
"\n",
"go.FigureWidget(data=[trace(x,y)])\n",
"\n",
"t2 = time.time()\n",
"print(t2-t1)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Plotting 1,000,000 points"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2.218580484390259\n"
]
}
],
"source": [
"df_small = df.sample(n=1000000, random_state=1)\n",
"x = df_small['start_station_code']\n",
"y = df_small['duration_sec']\n",
"\n",
"t1 = time.time()\n",
"\n",
"go.FigureWidget(data=[trace(x,y)])\n",
"\n",
"t2 = time.time()\n",
"print(t2-t1)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Plotting 2,000,000 points"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"4.26620626449585\n"
]
}
],
"source": [
"df_small = df.sample(n=2000000, random_state=1)\n",
"x = df_small['start_station_code']\n",
"y = df_small['duration_sec']\n",
"\n",
"t1 = time.time()\n",
"\n",
"go.FigureWidget(data=[trace(x,y)])\n",
"\n",
"t2 = time.time()\n",
"print(t2-t1)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Plotting entire dataset (4.5 mil data points)"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"9.956984996795654\n"
]
}
],
"source": [
"x = df['start_station_code']\n",
"y = df['duration_sec']\n",
"\n",
"t1 = time.time()\n",
"\n",
"go.FigureWidget(data=[trace(x,y)])\n",
"\n",
"t2 = time.time()\n",
"print(t2-t1)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment