Skip to content

Instantly share code, notes, and snippets.

@ronaldluc
Last active April 3, 2019 09:19
Show Gist options
  • Save ronaldluc/76967d7a61d3c82a09668142863c4ddd to your computer and use it in GitHub Desktop.
Save ronaldluc/76967d7a61d3c82a09668142863c4ddd to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Populating the interactive namespace from numpy and matplotlib\n"
]
}
],
"source": [
"%pylab inline"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import pandas as pd\n",
"import dask.dataframe as dd\n",
"from dask.distributed import Client"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# What data will we use?"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"./data/yellow_tripdata_2015-05.csv - size: 1.92GB\n",
"./data/yellow_tripdata_2015-11.csv - size: 1.65GB\n",
"./data/yellow_tripdata_2015-10.csv - size: 1.80GB\n",
"./data/yellow_tripdata_2015-04.csv - size: 1.91GB\n",
"./data/yellow_tripdata_2015-12.csv - size: 1.67GB\n",
"./data/yellow_tripdata_2015-06.csv - size: 1.80GB\n",
"./data/yellow_tripdata_2015-07.csv - size: 1.69GB\n",
"./data/yellow_tripdata_2015-03.csv - size: 1.94GB\n",
"./data/yellow_tripdata_2015-02.csv - size: 1.81GB\n",
"./data/yellow_tripdata_2015-01.csv - size: 1.85GB\n",
"./data/yellow_tripdata_2015-09.csv - size: 1.64GB\n",
"./data/yellow_tripdata_2015-08.csv - size: 1.63GB\n"
]
}
],
"source": [
"# Files that we will use for this example.\n",
"for file in os.listdir('./data/'):\n",
" print('./data/' + file + ' - size: %1.2f'\n",
" %(os.path.getsize('./data/'+file)/1024/1024/1024)+'GB')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Using pandas"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 13min 24s, sys: 3min 35s, total: 17min\n",
"Wall time: 15min 3s\n"
]
}
],
"source": [
"%%time\n",
"# Parse date time columns as datetime64[ns]\n",
"date_cols = ['tpep_pickup_datetime', 'tpep_dropoff_datetime'] \n",
"df = []\n",
"for i in range(1,13):\n",
" df.append(pd.read_csv('data/yellow_tripdata_2015-' + '%02d'%i + '.csv',\n",
" date_parser = pd.to_datetime, \n",
" parse_dates = date_cols))\n",
"df = pd.concat(df, sort=False)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"def calculate_distance(df):\n",
" \"\"\"Function to transform lat-lon coordinates to distance \n",
" according to (https://www.movable-type.co.uk/scripts/latlong.html)\"\"\"\n",
" \n",
" lon1 = np.radians(df.pickup_longitude)\n",
" lon2 = np.radians(df.dropoff_longitude)\n",
" lat1 = np.radians(df.pickup_latitude)\n",
" lat2 = np.radians(df.dropoff_latitude)\n",
" dlon = lon2 - lon1\n",
" dlat = lat2 - lat1 \n",
" a = np.sin(dlat/2)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2)**2\n",
" c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1-a))\n",
" R = 6373\n",
" return R * c"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 2min 1s, sys: 4min 11s, total: 6min 12s\n",
"Wall time: 4min 4s\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAX4AAAENCAYAAAAIbA6TAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4wLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvqOYd8AAAFnFJREFUeJzt3X+0XWV95/H3xwAqyiCYiEqAUEsVVER7J9rBKipgtC3g6KwSHYsdbWZUxq46nVlY1xIG18zQ2lnOTMUfqcZfrWBF0VRBQClSoWguEgg/NUZGMhnlSqxadcEEvvPH2VkcLvfmnpy7b37wvF9rnZWzn+fZ+/vcm+Rz9tlnn71TVUiS2vGo3T0BSdKuZfBLUmMMfklqjMEvSY0x+CWpMQa/JDXG4Jekxhj8ktQYg1+SGrPP7p7ATBYvXlzLli3b3dOQpL3G9ddf/6OqWjLK2D0y+JctW8bk5OTunoYk7TWS/O9Rx3qoR5IaY/BLUmMMfklqjMEvSY0x+CWpMQa/JDXG4Jekxhj8ktSYPfILXDuy7KwvjbXenef9Vs8zkaS9k3v8ktQYg1+SGmPwS1JjDH5JaozBL0mNMfglqTEGvyQ1xuCXpMbM+QWuJGuA3wburqpnzdD/H4HXDW3vaGBJVW1NcifwM+B+YFtVTfQ1cUmPbE/+u/VjrfeDlxw31nrjfDl0b/1i6Ch7/B8DVszWWVXvqarjquo44B3A16pq69CQl3T9hr4k7QHmDP6quhrYOte4zkrggnnNSJK0oHo7xp9kfwbvDD471FzA5UmuT7JqjvVXJZlMMjk1NdXXtCRJ0/R5kbbfAa6Zdpjn+KrakuRJwBVJbu/eQTxMVa0GVgNMTExUj/NSo/777/72WOv9h09/seeZSA+1uy822Wfwn860wzxVtaX78+4kFwPLgRmDX4985/+7K8da760ffGnPM9E4vnrl08Za72Uv/W7PM9F89XKoJ8mBwIuBLwy1PS7JAdufAycDN/dRT5I0vlFO57wAOAFYnGQzcDawL0BVfbAb9irg8qr6+dCqhwAXJ9le51NV9eX+pi5JGsecwV9VK0cY8zEGp30Ot20CnjPuxCRJC2OvuwPXI9mzP/7ssdbbcMaGsda77RlHj7Xe0bffNtZ6kvYMBr/Uk81n/f1Y6y097zd7nom0Y16rR5Ia4x7/jpxz4Jjr/aTfeUgzOOecc3bpenrkcI9fkhpj8EtSYwx+SWqMwS9JjTH4JakxBr8kNcbgl6TGGPyS1BiDX5IaY/BLUmMMfklqjMEvSY0x+CWpMQa/JDXG4JekxswZ/EnWJLk7yc2z9J+Q5CdJ1nePdw31rUhyR5KNSc7qc+KSpPGMssf/MWDFHGP+vqqO6x7nAiRZBJwPvAI4BliZ5Jj5TFaSNH9zBn9VXQ1sHWPby4GNVbWpqu4DLgROHWM7kqQe9XWM/zeS3Jjk0iTP7NoOBe4aGrO5a5tRklVJJpNMTk1N9TQtSdJ0fQT/t4Ajquo5wF8An+/aM8PYmm0jVbW6qiaqamLJkiU9TEuSNJN5B39V/bSq/ql7fgmwb5LFDPbwDxsauhTYMt96kqT5mXfwJ3lyknTPl3fbvAdYBxyV5Mgk+wGnA2vnW0+SND/7zDUgyQXACcDiJJuBs4F9Aarqg8BrgDcn2Qb8Eji9qgrYluRM4DJgEbCmqm5ZkJ9CkjSyOYO/qlbO0f8+4H2z9F0CXDLe1CRJC8Fv7kpSYwx+SWqMwS9JjTH4JakxBr8kNcbgl6TGGPyS1BiDX5IaY/BLUmMMfklqjMEvSY0x+CWpMQa/JDXG4Jekxhj8ktQYg1+SGmPwS1JjDH5JaozBL0mNmTP4k6xJcneSm2fpf12Sm7rHtUmeM9R3Z5INSdYnmexz4pKk8Yyyx/8xYMUO+r8HvLiqjgXeDaye1v+SqjquqibGm6IkqU/7zDWgqq5OsmwH/dcOLV4HLJ3/tCRJC6XvY/xvBC4dWi7g8iTXJ1m1oxWTrEoymWRyamqq52lJkrabc49/VElewiD4XzjUfHxVbUnyJOCKJLdX1dUzrV9Vq+kOE01MTFRf85IkPVQve/xJjgU+DJxaVfdsb6+qLd2fdwMXA8v7qCdJGt+8gz/J4cDngNdX1beH2h+X5IDtz4GTgRnPDJIk7TpzHupJcgFwArA4yWbgbGBfgKr6IPAu4InA+5MAbOvO4DkEuLhr2wf4VFV9eQF+BknSThjlrJ6Vc/S/CXjTDO2bgOc8fA1J0u7kN3clqTEGvyQ1xuCXpMYY/JLUGINfkhpj8EtSYwx+SWqMwS9JjTH4JakxBr8kNcbgl6TGGPyS1BiDX5IaY/BLUmMMfklqjMEvSY0x+CWpMQa/JDVmpOBPsibJ3UlmvFl6Bv5Xko1JbkryvKG+M5J8p3uc0dfEJUnjGXWP/2PAih30vwI4qnusAj4AkORgBjdnfz6wHDg7yUHjTlaSNH8jBX9VXQ1s3cGQU4FP1MB1wBOSPAV4OXBFVW2tqh8DV7DjFxBJ0gLr6xj/ocBdQ8ubu7bZ2h8myaokk0kmp6amepqWJGm6voI/M7TVDtof3li1uqomqmpiyZIlPU1LkjRdX8G/GThsaHkpsGUH7ZKk3aSv4F8L/F53ds8LgJ9U1f8FLgNOTnJQ96HuyV2bJGk32WeUQUkuAE4AFifZzOBMnX0BquqDwCXAK4GNwC+A3+/6tiZ5N7Cu29S5VbWjD4klSQtspOCvqpVz9Bfw1ln61gBrdn5qkqSF4Dd3JakxBr8kNcbgl6TGGPyS1BiDX5IaY/BLUmMMfklqjMEvSY0x+CWpMQa/JDXG4Jekxhj8ktQYg1+SGmPwS1JjDH5JaozBL0mNMfglqTEGvyQ1xuCXpMaMFPxJViS5I8nGJGfN0P/eJOu7x7eT/ONQ3/1DfWv7nLwkaefNebP1JIuA84GTgM3AuiRrq+rW7WOq6o+Gxv974LlDm/hlVR3X35QlSfMxyh7/cmBjVW2qqvuAC4FTdzB+JXBBH5OTJPVvlOA/FLhraHlz1/YwSY4AjgSuHGp+TJLJJNclOW22IklWdeMmp6amRpiWJGkcowR/ZmirWcaeDlxUVfcPtR1eVRPAa4H/keRpM61YVauraqKqJpYsWTLCtCRJ4xgl+DcDhw0tLwW2zDL2dKYd5qmqLd2fm4CreOjxf0nSLjZK8K8DjkpyZJL9GIT7w87OSfJ04CDgH4baDkry6O75YuB44Nbp60qSdp05z+qpqm1JzgQuAxYBa6rqliTnApNVtf1FYCVwYVUNHwY6GvhQkgcYvMicN3w2kCRp15sz+AGq6hLgkmlt75q2fM4M610LPHse85Mk9cxv7kpSYwx+SWqMwS9JjTH4JakxBr8kNcbgl6TGGPyS1BiDX5IaY/BLUmMMfklqjMEvSY0x+CWpMQa/JDXG4Jekxhj8ktQYg1+SGmPwS1JjDH5JaozBL0mNGSn4k6xIckeSjUnOmqH/DUmmkqzvHm8a6jsjyXe6xxl9Tl6StPPmvNl6kkXA+cBJwGZgXZK1VXXrtKGfrqozp617MHA2MAEUcH237o97mb0kaaeNsse/HNhYVZuq6j7gQuDUEbf/cuCKqtrahf0VwIrxpipJ6sMowX8ocNfQ8uaubbpXJ7kpyUVJDtvJdUmyKslkksmpqakRpiVJGscowZ8Z2mra8t8Cy6rqWOArwMd3Yt1BY9XqqpqoqoklS5aMMC1J0jhGCf7NwGFDy0uBLcMDquqeqrq3W/xL4NdHXVeStGuNEvzrgKOSHJlkP+B0YO3wgCRPGVo8Bbite34ZcHKSg5IcBJzctUmSdpM5z+qpqm1JzmQQ2IuANVV1S5JzgcmqWgu8LckpwDZgK/CGbt2tSd7N4MUD4Nyq2roAP4ckaURzBj9AVV0CXDKt7V1Dz98BvGOWddcAa+YxR0lSj/zmriQ1xuCXpMYY/JLUGINfkhpj8EtSYwx+SWqMwS9JjTH4JakxBr8kNcbgl6TGGPyS1BiDX5IaY/BLUmMMfklqjMEvSY0x+CWpMQa/JDXG4Jekxhj8ktSYkYI/yYokdyTZmOSsGfrfnuTWJDcl+WqSI4b67k+yvnus7XPykqSdN+fN1pMsAs4HTgI2A+uSrK2qW4eG3QBMVNUvkrwZ+DPgd7u+X1bVcT3PW5I0plH2+JcDG6tqU1XdB1wInDo8oKr+rqp+0S1eByztd5qSpL6MEvyHAncNLW/u2mbzRuDSoeXHJJlMcl2S02ZbKcmqbtzk1NTUCNOSJI1jzkM9QGZoqxkHJv8amABePNR8eFVtSfIrwJVJNlTVdx+2warVwGqAiYmJGbcvSZq/Ufb4NwOHDS0vBbZMH5TkROCdwClVde/29qra0v25CbgKeO485itJmqdRgn8dcFSSI5PsB5wOPOTsnCTPBT7EIPTvHmo/KMmju+eLgeOB4Q+FJUm72JyHeqpqW5IzgcuARcCaqrolybnAZFWtBd4DPB74TBKA71fVKcDRwIeSPMDgRea8aWcDSZJ2sVGO8VNVlwCXTGt719DzE2dZ71rg2fOZoCSpX35zV5IaY/BLUmMMfklqjMEvSY0x+CWpMQa/JDXG4Jekxhj8ktQYg1+SGmPwS1JjDH5JaozBL0mNMfglqTEGvyQ1xuCXpMYY/JLUGINfkhpj8EtSYwx+SWrMSMGfZEWSO5JsTHLWDP2PTvLprv8bSZYN9b2ja78jycv7m7okaRxzBn+SRcD5wCuAY4CVSY6ZNuyNwI+r6leB9wJ/2q17DHA68ExgBfD+bnuSpN1klD3+5cDGqtpUVfcBFwKnThtzKvDx7vlFwMuSpGu/sKrurarvARu77UmSdpNU1Y4HJK8BVlTVm7rl1wPPr6ozh8bc3I3Z3C1/F3g+cA5wXVX9Vdf+EeDSqrpohjqrgFXd4tOBO8b4eRYDPxpjvXHsylrWs5712qk3bq0jqmrJKAP3GWFMZmib/mox25hR1h00Vq0GVo8wn1klmayqiflsY0+sZT3rWa+derui1iiHejYDhw0tLwW2zDYmyT7AgcDWEdeVJO1CowT/OuCoJEcm2Y/Bh7Vrp41ZC5zRPX8NcGUNjiGtBU7vzvo5EjgK+GY/U5ckjWPOQz1VtS3JmcBlwCJgTVXdkuRcYLKq1gIfAT6ZZCODPf3Tu3VvSfI3wK3ANuCtVXX/Av0sMM9DRXtwLetZz3rt1FvwWnN+uCtJemTxm7uS1BiDX5IaY/BLUmMM/j1UkuVJ/nn3/Jgkb0/yyl1Y/xO7qpbGl2S/JL+X5MRu+bVJ3pfkrUn23d3z057JD3dHlOQZwKHAN6rqn4baV1TVl3uudTaDayPtA1zB4FvQVwEnApdV1X/pud7003MDvAS4EqCqTumz3gz1X8jgUh43V9XlC7D95wO3VdVPkzwWOAt4HoOzzf5rVf2k53pvAy6uqrv63O4stf6awb+T/YF/BB4PfA54GYP/32fsYPVxaz4NeBWD7+hsA74DXND371EL5xEZ/El+v6o+2uP23ga8FbgNOA74w6r6Qtf3rap6Xl+1um1u6Oo8GvgBsHQotL5RVcf2XO9bDELwwzz4jesLePC03K/1XO+bVbW8e/4HDH63FwMnA39bVef1XO8W4DndqcmrgV/QXVOqa/+XPdf7CfBz4LsMfo+fqaqpPmsM1bqpqo7tvjj5f4CnVtX93bWyblyAfytvA34H+BrwSmA98GMGLwRvqaqr+qynBVJVj7gH8P2et7cBeHz3fBkwySD8AW5YgPnfMNPzbnn9AtR7FPBHDN5dHNe1bVrAv5/hn28dsKR7/jhgwwLUu23o+bd2we/zhu53ejKD77hMAV9m8CXHA3qudTOwH3AQ8DPg4K79McM/d4/1NgCLuuf7A1d1zw9fiP8L3bYPBM4Dbgfu6R63dW1PWIiaO5jLpQuwzX8G/Dfgk8Brp/W9fyF+jlGu1bNHSnLTbF3AIT2XW1Td4Z2qujPJCcBFSY5g5usRzdd9Sfavql8Av769McmBwAN9F6uqB4D3JvlM9+cPGe06TuN6VJKDGIRjqtsbrqqfJ9m2APVuHnoXeGOSiaqaTPJrwP9bgHrV/U4vBy7vjrW/AlgJ/Dkw0oW0RvQRBoG4CHgn8Jkkm4AXMLiS7kLYB7ifwTvSAwCq6vsL+JnC3zA47HhCVf0AIMmTGbyQfgY4qc9iSWZ7Bx8G78T79lEGh8s+C/ybJK9m8AJwL4O/x97ttYd6unB6OYO3mQ/pAq6tqqf2WOtK4O1VtX6obR9gDfC6qur1HgNJHt39pU9vXww8pao29Flvhjq/BRxfVX+yQNu/k8ELWBgcWvoXVfWDJI8Hvl5Vvf7n6l4w/yfwmwyuevg84K7u8baqurHnejdU1XNn6XtsVf2y53pPBaiqLUmewOCzoO9XVe+XR0nyhwzuv3Ed8CLgT6vqo0mWAJ+tqhctQM07qurpO9s3j3r3MziUNdNO3Quq6rE911s//G8+yTsZHEY7Bbiiej6UDHt38H8E+GhVfX2Gvk9V1Wt7rLUU2LZ9b2Na3/FVdU1ftVqWZH/gkBrcu2Ehtn8A8CsM9lg3V9UPF6jOr1XVtxdi23uCJM8EjmbwYfztu6De5cBXgI9v/ztLcgjwBuCkqjqx53o3A6+qqu/M0HdXVR02w2rzqXcb8MzuXeL2tjOA/8TgEPMRfdaDvTj4JbWhOyx4FoMbOz2pa/4hg4tAnldV09/1z7feaxh81vSwe4IkOa2qPt9zvT8DLq+qr0xrXwH8RVUd1Wc9MPgl7cX6PoOvlXoGv6S9VpLvV9Xh1ts5e+1ZPZLasIvP4HvE1wODX9Ke7xB2cAaf9XaewS9pT/dFBme3rJ/ekeQq6+08j/FLUmO8OqckNcbgl6TGGPyS1BiDX3u1JB9OcswO+s9J8se7ck7zleQJSd4ytHxCki/uzjnpkcXg116tqt5UVbfu7nn07AnAW+YcJY3J4NdeIcmyJLcn+XiSm5JclGT/JFclmejGrEjyrSQ3JvnqDNv4gySXJnnstPUWd1cMJckbknwhyZeT3NHdDW2uOX04yc1J/jrJiUmuSfKdJNtvNnNwks93874uybFd+zlJ1nRz2dTd5AQG15l/WpL1Sd7TtT2++5lv7+osxOXA1QjP49fe5OnAG6vqmiRrGNor7i4L/JfAi6rqe0kOHl4xyZkMboxyWlXdO0duLgeexeBOXeuSfKmqJmcZ+6vAvwJWMbipzGuBFzK4pO6fAKcB/5nBTUpOS/JS4BM8eF33ZzC4zeUBwB1JPsDggmTP2n6p3gzu//Bc4JnAFuAa4HjgYVemlUbhHr/2JncNXQL7rxgE7HYvAK7efknnqto61Pd6BjdCefVM9zmYwRVVdU933fzPTasz3feqakN3Sd1bgK/W4MsxGxjcrY1u/U9287oSeGJ3jwCAL1XVvVX1I+BuZv+K/jeranNXZ/3QtqWdZvBrbzL924bDy5mhf7ubGQTl0qG2bTz47/8xO1FnuuEXkgeGlh/gwXfUM7292L7N4fXvZ/Z34aOOk+Zk8GtvcniS3+ier+Shhzr+AXhxkiNhcFx9qO8G4N8Ca7ffrQq4kwdva/maaXVO6o7LP5bBoZr53mjnauB13bxOAH5UVT/dwfif0d3SUFoIBr/2JrcBZ3RXMzwY+MD2ju6+vauAzyW5Efj08Irdndr+GPhSdwvLPwfenORaYPG0Ol9ncGhmPYPbCc52fH9U5wAT3bzPY3Cv2FlV1T3ANd0Hxu/Z0VhpHF6rR3uFJMuAL1bVsxa4zhuAiao6cyHrSLuTe/yS1Bj3+KU5JHki8LDvBQAv6w7LSHsVg1+SGuOhHklqjMEvSY0x+CWpMQa/JDXm/wPh2dJk4ZjmAgAAAABJRU5ErkJggg==\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"%%time\n",
"df['distance'] = calculate_distance(df)\n",
"df['fare_per_mile'] = df.fare_amount/df.distance\n",
"df['trip_distance_km'] = df.trip_distance*1.609344\n",
"df['time_in_cab'] = df.tpep_dropoff_datetime - df.tpep_pickup_datetime\n",
"df['pickup_hour'] = df.tpep_pickup_datetime.dt.hour\n",
"df['pickup_dow'] = df.tpep_pickup_datetime.dt.dayofweek\n",
"df['pickup_month'] = df.tpep_pickup_datetime.dt.month\n",
"\n",
"df.groupby(df.pickup_month).tip_amount.mean().sort_index().plot.bar()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Using Dask"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"scrolled": false
},
"outputs": [
{
"data": {
"text/html": [
"<table style=\"border: 2px solid white;\">\n",
"<tr>\n",
"<td style=\"vertical-align: top; border: 0px solid white\">\n",
"<h3>Client</h3>\n",
"<ul>\n",
" <li><b>Scheduler: </b>inproc://192.168.3.176/24483/1\n",
" <li><b>Dashboard: </b><a href='http://localhost:8787/status' target='_blank'>http://localhost:8787/status</a>\n",
"</ul>\n",
"</td>\n",
"<td style=\"vertical-align: top; border: 0px solid white\">\n",
"<h3>Cluster</h3>\n",
"<ul>\n",
" <li><b>Workers: </b>1</li>\n",
" <li><b>Cores: </b>12</li>\n",
" <li><b>Memory: </b>17.18 GB</li>\n",
"</ul>\n",
"</td>\n",
"</tr>\n",
"</table>"
],
"text/plain": [
"<Client: scheduler='inproc://192.168.3.176/24483/1' processes=1 cores=12>"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"client = Czlient(processes=False)\n",
"client"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 29min 54s, sys: 54min 14s, total: 1h 24min 9s\n",
"Wall time: 10min 14s\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAX4AAAENCAYAAAAIbA6TAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4wLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvqOYd8AAAFnJJREFUeJzt3X+0XWV95/H3xwAqyiCYiEqAUEsVVER7J9rBKipgtC3g6KwSHYsdbWZUxq46nVlY1xIG18zQ2lnOTMUfqcZfrWBF0VRBQClSoWguEgg/NUZGMhnlSqxadcEEvvPH2VkcLvfmnpy7b37wvF9rnZWzn+fZ+/vcm+Rz9tlnn71TVUiS2vGo3T0BSdKuZfBLUmMMfklqjMEvSY0x+CWpMQa/JDXG4Jekxhj8ktQYg1+SGrPP7p7ATBYvXlzLli3b3dOQpL3G9ddf/6OqWjLK2D0y+JctW8bk5OTunoYk7TWS/O9Rx3qoR5IaY/BLUmMMfklqjMEvSY0x+CWpMQa/JDXG4Jekxhj8ktSYPfILXDuy7KwvjbXenef9Vs8zkaS9k3v8ktQYg1+SGmPwS1JjDH5JaozBL0mNMfglqTEGvyQ1Zq87j19SG578d+vHWu8HLzmu55k88swZ/EnWAL8N3F1Vz5qh/z8Crxva3tHAkqramuRO4GfA/cC2qproa+KS1Kdxvhy6t34xdJRDPR8DVszWWVXvqarjquo44B3A16pq69CQl3T9hr4k7QHmDP6quhrYOte4zkrggnnNSJK0oHr7cDfJ/gzeGXx2qLmAy5Ncn2TVHOuvSjKZZHJqaqqvaUmSpunzw93fAa6Zdpjn+KrakuRJwBVJbu/eQTxMVa0GVgNMTExUj/NSo/777/72WOv9h09/seeZSA+1uy822Wfwn860wzxVtaX78+4kFwPLgRmDX4985/+7K8da760ffGnPM9E4vnrl08Za72Uv/W7PM9F89XKoJ8mBwIuBLwy1PS7JAdufAycDN/dRT5I0vlFO57wAOAFYnGQzcDawL0BVfbAb9irg8qr6+dCqhwAXJ9le51NV9eX+pi5JGsecwV9VK0cY8zEGp30Ot20CnjPuxCRJC8Nv7u5Bnv3xZ4+13oYzNoy13m3POHqs9Y6+/bax1pO0ZzD4pZ5sPuvvx1pv6Xm/2fNMpB3zIm2S1Bj3+HfknAPHXO8n/c5DmsE555yzS9fTI4d7/JLUGINfkhpj8EtSYwx+SWqMwS9JjTH4JakxBr8kNcbgl6TGGPyS1BiDX5IaY/BLUmMMfklqjMEvSY0x+CWpMQa/JDVmzuBPsibJ3UlunqX/hCQ/SbK+e7xrqG9FkjuSbExyVp8TlySNZ5Q9/o8BK+YY8/dVdVz3OBcgySLgfOAVwDHAyiTHzGeykqT5mzP4q+pqYOsY214ObKyqTVV1H3AhcOoY25Ek9aivY/y/keTGJJcmeWbXdihw19CYzV3bjJKsSjKZZHJqaqqnaUmSpusj+L8FHFFVzwH+Avh8154ZxtZsG6mq1VU1UVUTS5Ys6WFakqSZzDv4q+qnVfVP3fNLgH2TLGawh3/Y0NClwJb51pMkzc+8gz/Jk5Oke7682+Y9wDrgqCRHJtkPOB1YO996kqT52WeuAUkuAE4AFifZDJwN7AtQVR8EXgO8Ock24JfA6VVVwLYkZwKXAYuANVV1y4L8FJKkkc0Z/FW1co7+9wHvm6XvEuCS8aYmSVoIfnNXkhpj8EtSYwx+SWqMwS9JjTH4JakxBr8kNcbgl6TGGPyS1BiDX5IaY/BLUmMMfklqjMEvSY0x+CWpMQa/JDXG4Jekxhj8ktQYg1+SGmPwS1JjDH5JasycwZ9kTZK7k9w8S//rktzUPa5N8pyhvjuTbEiyPslknxOXJI1nlD3+jwErdtD/PeDFVXUs8G5g9bT+l1TVcVU1Md4UJUl92meuAVV1dZJlO+i/dmjxOmDp/KclSVoofR/jfyNw6dByAZcnuT7Jqh2tmGRVkskkk1NTUz1PS5K03Zx7/KNK8hIGwf/Coebjq2pLkicBVyS5vaqunmn9qlpNd5hoYmKi+pqXJOmhetnjT3Is8GHg1Kq6Z3t7VW3p/rwbuBhY3kc9SdL45h38SQ4HPge8vqq+PdT+uCQHbH8OnAzMeGaQJGnXmfNQT5ILgBOAxUk2A2cD+wJU1QeBdwFPBN6fBGBbdwbPIcDFXds+wKeq6ssL8DNIknbCKGf1rJyj/03Am2Zo3wQ85+FrSJJ2J7+5K0mNMfglqTEGvyQ1xuCXpMYY/JLUGINfkhpj8EtSYwx+SWqMwS9JjTH4JakxBr8kNcbgl6TGGPyS1BiDX5IaY/BLUmMMfklqjMEvSY0x+CWpMSMFf5I1Se5OMuPN0jPwv5JsTHJTkucN9Z2R5Dvd44y+Ji5JGs+oe/wfA1bsoP8VwFHdYxXwAYAkBzO4OfvzgeXA2UkOGneykqT5Gyn4q+pqYOsOhpwKfKIGrgOekOQpwMuBK6pqa1X9GLiCHb+ASJIWWF/H+A8F7hpa3ty1zdb+MElWJZlMMjk1NdXTtCRJ0/UV/JmhrXbQ/vDGqtVVNVFVE0uWLOlpWpKk6foK/s3AYUPLS4EtO2iXJO0mfQX/WuD3urN7XgD8pKr+L3AZcHKSg7oPdU/u2iRJu8k+owxKcgFwArA4yWYGZ+rsC1BVHwQuAV4JbAR+Afx+17c1ybuBdd2mzq2qHX1ILElaYCMFf1WtnKO/gLfO0rcGWLPzU5MkLQS/uStJjTH4JakxBr8kNcbgl6TGGPyS1BiDX5IaY/BLUmMMfklqjMEvSY0x+CWpMQa/JDXG4Jekxhj8ktQYg1+SGmPwS1JjDH5JaozBL0mNMfglqTEGvyQ1ZqTgT7IiyR1JNiY5a4b+9yZZ3z2+neQfh/ruH+pb2+fkJUk7b86brSdZBJwPnARsBtYlWVtVt24fU1V/NDT+3wPPHdrEL6vquP6mLEmaj1H2+JcDG6tqU1XdB1wInLqD8SuBC/qYnCSpf6ME/6HAXUPLm7u2h0lyBHAkcOVQ82OSTCa5LslpsxVJsqobNzk1NTXCtCRJ4xgl+DNDW80y9nTgoqq6f6jt8KqaAF4L/I8kT5tpxapaXVUTVTWxZMmSEaYlSRrHKMG/GThsaHkpsGWWsacz7TBPVW3p/twEXMVDj/9LknaxUYJ/HXBUkiOT7Mcg3B92dk6SpwMHAf8w1HZQkkd3zxcDxwO3Tl9XkrTrzHlWT1VtS3ImcBmwCFhTVbckOReYrKrtLwIrgQuravgw0NHAh5I8wOBF5rzhs4EkSbvenMEPUFWXAJdMa3vXtOVzZljvWuDZ85ifJKlnfnNXkhpj8EtSYwx+SWqMwS9JjTH4JakxBr8kNcbgl6TGGPyS1BiDX5IaY/BLUmMMfklqjMEvSY0x+CWpMQa/JDXG4Jekxhj8ktQYg1+SGmPwS1JjDH5JasxIwZ9kRZI7kmxMctYM/W9IMpVkffd401DfGUm+0z3O6HPykqSdN+fN1pMsAs4HTgI2A+uSrK2qW6cN/XRVnTlt3YOBs4EJoIDru3V/3MvsJUk7bZQ9/uXAxqraVFX3ARcCp464/ZcDV1TV1i7srwBWjDdVSVIfRgn+Q4G7hpY3d23TvTrJTUkuSnLYTq5LklVJJpNMTk1NjTAtSdI4Rgn+zNBW05b/FlhWVccCXwE+vhPrDhqrVlfVRFVNLFmyZIRpSZLGMUrwbwYOG1peCmwZHlBV91TVvd3iXwK/Puq6kqRda5TgXwccleTIJPsBpwNrhwckecrQ4inAbd3zy4CTkxyU5CDg5K5NkrSbzHlWT1VtS3Img8BeBKypqluSnAtMVtVa4G1JTgG2AVuBN3Trbk3ybgYvHgDnVtXWBfg5JEkjmjP4AarqEuCSaW3vGnr+DuAds6y7BlgzjzlKknrkN3clqTEGvyQ1xuCXpMYY/JLUGINfkhpj8EtSYwx+SWqMwS9JjTH4JakxBr8kNcbgl6TGGPyS1BiDX5IaY/BLUmMMfklqjMEvSY0x+CWpMQa/JDXG4JekxowU/ElWJLkjycYkZ83Q//Yktya5KclXkxwx1Hd/kvXdY22fk5ck7bw5b7aeZBFwPnASsBlYl2RtVd06NOwGYKKqfpHkzcCfAb/b9f2yqo7red6SpDGNsse/HNhYVZuq6j7gQuDU4QFV9XdV9Ytu8Tpgab/TlCT1ZZTgPxS4a2h5c9c2mzcClw4tPybJZJLrkpw220pJVnXjJqempkaYliRpHHMe6gEyQ1vNODD518AE8OKh5sOrakuSXwGuTLKhqr77sA1WrQZWA0xMTMy4fUnS/I2yx78ZOGxoeSmwZfqgJCcC7wROqap7t7dX1Zbuz03AVcBz5zFfSdI8jRL864CjkhyZZD/gdOAhZ+ckeS7wIQahf/dQ+0FJHt09XwwcDwx/KCxJ2sXmPNRTVduSnAlcBiwC1lTVLUnOBSarai3wHuDxwGeSAHy/qk4BjgY+lOQBBi8y5007G0iStIuNcoyfqroEuGRa27uGnp84y3rXAs+ezwQlSf3ym7uS1BiDX5IaY/BLUmMMfklqjMEvSY0x+CWpMQa/JDXG4Jekxhj8ktQYg1+SGmPwS1JjDH5JaozBL0mNMfglqTEGvyQ1xuCXpMYY/JLUGINfkhpj8EtSY0YK/iQrktyRZGOSs2bof3SST3f930iybKjvHV37HUle3t/UJUnjmDP4kywCzgdeARwDrExyzLRhbwR+XFW/CrwX+NNu3WOA04FnAiuA93fbkyTtJqPs8S8HNlbVpqq6D7gQOHXamFOBj3fPLwJeliRd+4VVdW9VfQ/Y2G1PkrSbpKp2PCB5DbCiqt7ULb8eeH5VnTk05uZuzOZu+bvA84FzgOuq6q+69o8Al1bVRTPUWQWs6hafDtwxxs+zGPjRGOuNY1fWsp71rNdOvXFrHVFVS0YZuM8IYzJD2/RXi9nGjLLuoLFqNbB6hPnMKslkVU3MZxt7Yi3rWc967dTbFbVGOdSzGThsaHkpsGW2MUn2AQ4Eto64riRpFxol+NcBRyU5Msl+DD6sXTttzFrgjO75a4Ara3AMaS1wenfWz5HAUcA3+5m6JGkccx7qqaptSc4ELgMWAWuq6pYk5wKTVbUW+AjwySQbGezpn96te0uSvwFuBbYBb62q+xfoZ4F5Hirag2tZz3rWa6fegtea88NdSdIji9/claTGGPyS1BiDX5IaY/DvoZIsT/LPu+fHJHl7klfuwvqf2FW1NL4k+yX5vSQndsuvTfK+JG9Nsu/unp/2TH64O6IkzwAOBb5RVf801L6iqr7cc62zGVwbaR/gCgbfgr4KOBG4rKr+S8/1pp+eG+AlwJUAVXVKn/VmqP9CBpfyuLmqLl+A7T8fuK2qfprkscBZwPMYnG32X6vqJz3XextwcVXd1ed2Z6n11wz+newP/CPweOBzwMsY/P8+Ywerj1vzacCrGHxHZxvwHeCCvn+PWjiPyOBP8vtV9dEet/c24K3AbcBxwB9W1Re6vm9V1fP6qtVtc0NX59HAD4ClQ6H1jao6tud632IQgh/mwW9cX8CDp+V+red636yq5d3zP2Dwu70YOBn426o6r+d6twDP6U5NXg38gu6aUl37v+y53k+AnwPfZfB7/ExVTfVZY6jWTVV1bPfFyf8DPLWq7u+ulXXjAvxbeRvwO8DXgFcC64EfM3gheEtVXdVnPS2QqnrEPYDv97y9DcDju+fLgEkG4Q9wwwLM/4aZnnfL6xeg3qOAP2Lw7uK4rm3TAv79DP9864Al3fPHARsWoN5tQ8+/tQt+nzd0v9OTGXzHZQr4MoMvOR7Qc62bgf2Ag4CfAQd37Y8Z/rl7rLcBWNQ93x+4qnt++EL8X+i2fSBwHnA7cE/3uK1re8JC1NzBXC5dgG3+M+C/AZ8EXjut7/0L8XOMcq2ePVKSm2brAg7pudyi6g7vVNWdSU4ALkpyBDNfj2i+7kuyf1X9Avj17Y1JDgQe6LtYVT0AvDfJZ7o/f8ho13Ea16OSHMQgHFPd3nBV/TzJtgWod/PQu8Abk0xU1WSSXwP+3wLUq+53ejlweXes/RXASuDPgZEupDWijzAIxEXAO4HPJNkEvIDBlXQXwj7A/QzekR4AUFXfX8DPFP6GwWHHE6rqBwBJnszghfQzwEl9Fksy2zv4MHgn3rePMjhc9lng3yR5NYMXgHsZ/D32bq891NOF08sZvM18SBdwbVU9tcdaVwJvr6r1Q237AGuA11VVr/cYSPLo7i99evti4ClVtaHPejPU+S3g+Kr6kwXa/p0MXsDC4NDSv6iqHyR5PPD1qur1P1f3gvk/gd9kcNXD5wF3dY+3VdWNPde7oaqeO0vfY6vqlz3XeypAVW1J8gQGnwV9v6p6vzxKkj9kcP+N64AXAX9aVR9NsgT4bFW9aAFq3lFVT9/ZvnnUu5/BoayZdupeUFWP7bne+uF/80neyeAw2inAFdXzoWTYu4P/I8BHq+rrM/R9qqpe22OtpcC27Xsb0/qOr6pr+qrVsiT7A4fU4N4NC7H9A4BfYbDHurmqfrhAdX6tqr69ENveEyR5JnA0gw/jb98F9S4HvgJ8fPvfWZJDgDcAJ1XViT3Xuxl4VVV9Z4a+u6rqsBlWm0+924Bndu8St7edAfwnBoeYj+izHuzFwS+pDd1hwbMY3NjpSV3zDxlcBPK8qpr+rn++9V7D4LOmh90TJMlpVfX5nuv9GXB5VX1lWvsK4C+q6qg+64HBL2kv1vcZfK3UM/gl7bWSfL+qDrfeztlrz+qR1IZdfAbfI74eGPyS9nyHsIMz+Ky38wx+SXu6LzI4u2X99I4kV1lv53mMX5Ia49U5JakxBr8kNcbgl6TGGPzaqyX5cJJjdtB/TpI/3pVzmq8kT0jylqHlE5J8cXfOSY8sBr/2alX1pqq6dXfPo2dPAN4y5yhpTAa/9gpJliW5PcnHk9yU5KIk+ye5KslEN2ZFkm8luTHJV2fYxh8kuTTJY6ett7i7YihJ3pDkC0m+nOSO7m5oc83pw0luTvLXSU5Mck2S7yTZfrOZg5N8vpv3dUmO7drPSbKmm8um7iYnMLjO/NOSrE/ynq7t8d3PfHtXZyEuB65GeB6/9iZPB95YVdckWcPQXnF3WeC/BF5UVd9LcvDwiknOZHBjlNOq6t45cnM58CwGd+pal+RLVTU5y9hfBf4VsIrBTWVeC7yQwSV1/wQ4DfjPDG5SclqSlwKf4MHruj+DwW0uDwDuSPIBBhcke9b2S/VmcP+H5wLPBLYA1wDHAw+7Mq00Cvf4tTe5a+gS2H/FIGC3ewFw9fZLOlfV1qG+1zO4EcqrZ7rPwQyuqKp7uuvmf25anem+V1Ubukvq3gJ8tQZfjtnA4G5tdOt/spvXlcATu3sEAHypqu6tqh8BdzP7V/S/WVWbuzrrh7Yt7TSDX3uT6d82HF7ODP3b3cwgKJcOtW3jwX//j9mJOtMNv5A8MLT8AA++o57p7cX2bQ6vfz+zvwsfdZw0J4Nfe5PDk/xG93wlDz3U8Q/Ai5McCYPj6kN9NwD/Fli7/W5VwJ08eFvL10yrc1J3XP6xDA7VzPdGO1cDr+vmdQLwo6r66Q7G/4zulobSQjD4tTe5DTiju5rhwcAHtnd09+1dBXwuyY3Ap4dX7O7U9sfAl7pbWP458OYk1wKLp9X5OoNDM+sZ3E5wtuP7ozoHmOjmfR6De8XOqqruAa7pPjB+z47GSuPwWj3aKyRZBnyxqp61wHXeAExU1ZkLWUfandzjl6TGuMcvzSHJE4GHfS8AeFl3WEbaqxj8ktQYD/VIUmMMfklqjMEvSY0x+CWpMf8fDW3SZrvAjwUAAAAASUVORK5CYII=\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"%%time \n",
"ddf = dd.read_csv('data/yellow_tripdata_2015-*.csv',blocksize=13e7,assume_missing=True)\n",
"ddf['distance'] = ddf.map_partitions(lambda df: calculate_distance(df))\n",
"ddf['fare_per_mile'] = ddf.fare_amount/ddf.distance\n",
"ddf['trip_distance_km'] = ddf.trip_distance*1.609344\n",
"ddf['tpep_pickup_datetime'] = dd.to_datetime(ddf.tpep_pickup_datetime, errors='ignore')\n",
"ddf['tpep_dropoff_datetime'] = dd.to_datetime(ddf.tpep_dropoff_datetime, errors='ignore')\n",
"ddf['time_in_cab'] = ddf.tpep_dropoff_datetime - ddf.tpep_pickup_datetime\n",
"ddf['pickup_hour'] = ddf.tpep_pickup_datetime.dt.hour\n",
"ddf['pickup_dow'] = ddf.tpep_pickup_datetime.dt.dayofweek\n",
"ddf['pickup_month'] = ddf.tpep_pickup_datetime.dt.month\n",
"\n",
"ddf = client.persist(ddf)\n",
"ddf.groupby(ddf.pickup_month).tip_amount.mean().compute().sort_index().plot.bar()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.6"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment