Skip to content

Instantly share code, notes, and snippets.

@chriddyp
Created January 22, 2015 06:13
Show Gist options
  • Save chriddyp/49b0a5284f02b69dcc14 to your computer and use it in GitHub Desktop.
Save chriddyp/49b0a5284f02b69dcc14 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Display the source blob
Display the rendered blob
Raw
{
"metadata": {
"name": "",
"signature": "sha256:28230294872ccb59039f6518806b5098322a8d4c6f04dd222a79e00e54056380"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"from IPython.html import widgets \n",
"from IPython.display import display, clear_output\n",
"\n",
"import plotly.plotly as py\n",
"from plotly.graph_objs import *\n",
"import plotly\n",
"from plotly.widgets import GraphWidget\n",
"\n",
"import pandas as pd"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 19
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"df = pd.read_csv('311_150k.csv', infer_datetime_format=True)\n",
"df = df\n",
"df.head()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Unique Key</th>\n",
" <th>Created Date</th>\n",
" <th>Closed Date</th>\n",
" <th>Agency</th>\n",
" <th>Agency Name</th>\n",
" <th>Complaint Type</th>\n",
" <th>Descriptor</th>\n",
" <th>Location Type</th>\n",
" <th>Incident Zip</th>\n",
" <th>Incident Address</th>\n",
" <th>...</th>\n",
" <th>Bridge Highway Name</th>\n",
" <th>Bridge Highway Direction</th>\n",
" <th>Road Ramp</th>\n",
" <th>Bridge Highway Segment</th>\n",
" <th>Garage Lot Name</th>\n",
" <th>Ferry Direction</th>\n",
" <th>Ferry Terminal Name</th>\n",
" <th>Latitude</th>\n",
" <th>Longitude</th>\n",
" <th>Location</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td> 29300358</td>\n",
" <td> 11/16/2014 11:46:00 PM</td>\n",
" <td> 11/16/2014 11:46:00 PM</td>\n",
" <td> DSNY</td>\n",
" <td> BCC - Queens East</td>\n",
" <td> Derelict Vehicles</td>\n",
" <td> 14 Derelict Vehicles</td>\n",
" <td> Street</td>\n",
" <td> 11432</td>\n",
" <td> 80-25 PARSONS BOULEVARD</td>\n",
" <td>...</td>\n",
" <td> NaN</td>\n",
" <td> NaN</td>\n",
" <td> NaN</td>\n",
" <td> NaN</td>\n",
" <td> NaN</td>\n",
" <td> NaN</td>\n",
" <td> NaN</td>\n",
" <td> 40.719411</td>\n",
" <td>-73.808882</td>\n",
" <td> (40.719410639341916, -73.80888158860446)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td> 29299837</td>\n",
" <td> 11/16/2014 02:24:35 AM</td>\n",
" <td> 11/16/2014 02:24:35 AM</td>\n",
" <td> DOB</td>\n",
" <td> Department of Buildings</td>\n",
" <td> Building/Use</td>\n",
" <td> Illegal Conversion Of Residential Building/Space</td>\n",
" <td> NaN</td>\n",
" <td> 10465</td>\n",
" <td> 938 HUNTINGTON AVENUE</td>\n",
" <td>...</td>\n",
" <td> NaN</td>\n",
" <td> NaN</td>\n",
" <td> NaN</td>\n",
" <td> NaN</td>\n",
" <td> NaN</td>\n",
" <td> NaN</td>\n",
" <td> NaN</td>\n",
" <td> 40.827862</td>\n",
" <td>-73.830641</td>\n",
" <td> (40.827862046105416, -73.83064067165407)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td> 29297857</td>\n",
" <td> 11/16/2014 02:17:12 AM</td>\n",
" <td> 11/16/2014 02:50:48 AM</td>\n",
" <td> NYPD</td>\n",
" <td> New York City Police Department</td>\n",
" <td> Illegal Parking</td>\n",
" <td> Blocked Sidewalk</td>\n",
" <td> Street/Sidewalk</td>\n",
" <td> 11201</td>\n",
" <td> 229 DUFFIELD STREET</td>\n",
" <td>...</td>\n",
" <td> NaN</td>\n",
" <td> NaN</td>\n",
" <td> NaN</td>\n",
" <td> NaN</td>\n",
" <td> NaN</td>\n",
" <td> NaN</td>\n",
" <td> NaN</td>\n",
" <td> 40.691248</td>\n",
" <td>-73.984375</td>\n",
" <td> (40.69124772858873, -73.98437529459297)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td> 29294647</td>\n",
" <td> 11/16/2014 02:15:13 AM</td>\n",
" <td> NaN</td>\n",
" <td> NYPD</td>\n",
" <td> New York City Police Department</td>\n",
" <td> Noise - Street/Sidewalk</td>\n",
" <td> Loud Music/Party</td>\n",
" <td> Street/Sidewalk</td>\n",
" <td> 10040</td>\n",
" <td> 128 NAGLE AVENUE</td>\n",
" <td>...</td>\n",
" <td> NaN</td>\n",
" <td> NaN</td>\n",
" <td> NaN</td>\n",
" <td> NaN</td>\n",
" <td> NaN</td>\n",
" <td> NaN</td>\n",
" <td> NaN</td>\n",
" <td> 40.861248</td>\n",
" <td>-73.926308</td>\n",
" <td> (40.861247930170535, -73.92630783362215)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td> 29300211</td>\n",
" <td> 11/16/2014 02:14:01 AM</td>\n",
" <td> NaN</td>\n",
" <td> NYPD</td>\n",
" <td> New York City Police Department</td>\n",
" <td> Illegal Parking</td>\n",
" <td> Commercial Overnight Parking</td>\n",
" <td> Street/Sidewalk</td>\n",
" <td> 10306</td>\n",
" <td> 625 LINCOLN AVENUE</td>\n",
" <td>...</td>\n",
" <td> NaN</td>\n",
" <td> NaN</td>\n",
" <td> NaN</td>\n",
" <td> NaN</td>\n",
" <td> NaN</td>\n",
" <td> NaN</td>\n",
" <td> NaN</td>\n",
" <td> 40.570565</td>\n",
" <td>-74.092229</td>\n",
" <td> (40.57056460126485, -74.09222907551542)</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows \u00d7 52 columns</p>\n",
"</div>"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 20,
"text": [
" Unique Key Created Date Closed Date Agency \\\n",
"0 29300358 11/16/2014 11:46:00 PM 11/16/2014 11:46:00 PM DSNY \n",
"1 29299837 11/16/2014 02:24:35 AM 11/16/2014 02:24:35 AM DOB \n",
"2 29297857 11/16/2014 02:17:12 AM 11/16/2014 02:50:48 AM NYPD \n",
"3 29294647 11/16/2014 02:15:13 AM NaN NYPD \n",
"4 29300211 11/16/2014 02:14:01 AM NaN NYPD \n",
"\n",
" Agency Name Complaint Type \\\n",
"0 BCC - Queens East Derelict Vehicles \n",
"1 Department of Buildings Building/Use \n",
"2 New York City Police Department Illegal Parking \n",
"3 New York City Police Department Noise - Street/Sidewalk \n",
"4 New York City Police Department Illegal Parking \n",
"\n",
" Descriptor Location Type \\\n",
"0 14 Derelict Vehicles Street \n",
"1 Illegal Conversion Of Residential Building/Space NaN \n",
"2 Blocked Sidewalk Street/Sidewalk \n",
"3 Loud Music/Party Street/Sidewalk \n",
"4 Commercial Overnight Parking Street/Sidewalk \n",
"\n",
" Incident Zip Incident Address \\\n",
"0 11432 80-25 PARSONS BOULEVARD \n",
"1 10465 938 HUNTINGTON AVENUE \n",
"2 11201 229 DUFFIELD STREET \n",
"3 10040 128 NAGLE AVENUE \n",
"4 10306 625 LINCOLN AVENUE \n",
"\n",
" ... Bridge Highway Name \\\n",
"0 ... NaN \n",
"1 ... NaN \n",
"2 ... NaN \n",
"3 ... NaN \n",
"4 ... NaN \n",
"\n",
" Bridge Highway Direction Road Ramp Bridge Highway Segment Garage Lot Name \\\n",
"0 NaN NaN NaN NaN \n",
"1 NaN NaN NaN NaN \n",
"2 NaN NaN NaN NaN \n",
"3 NaN NaN NaN NaN \n",
"4 NaN NaN NaN NaN \n",
"\n",
" Ferry Direction Ferry Terminal Name Latitude Longitude \\\n",
"0 NaN NaN 40.719411 -73.808882 \n",
"1 NaN NaN 40.827862 -73.830641 \n",
"2 NaN NaN 40.691248 -73.984375 \n",
"3 NaN NaN 40.861248 -73.926308 \n",
"4 NaN NaN 40.570565 -74.092229 \n",
"\n",
" Location \n",
"0 (40.719410639341916, -73.80888158860446) \n",
"1 (40.827862046105416, -73.83064067165407) \n",
"2 (40.69124772858873, -73.98437529459297) \n",
"3 (40.861247930170535, -73.92630783362215) \n",
"4 (40.57056460126485, -74.09222907551542) \n",
"\n",
"[5 rows x 52 columns]"
]
}
],
"prompt_number": 20
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Simple bar chart"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"df['Complaint Type'].value_counts()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 21,
"text": [
"HEAT/HOT WATER 32202\n",
"Street Light Condition 7558\n",
"Blocked Driveway 6997\n",
"UNSANITARY CONDITION 6174\n",
"PAINT/PLASTER 5388\n",
"Illegal Parking 5381\n",
"Street Condition 4847\n",
"Noise 4615\n",
"PLUMBING 4284\n",
"Water System 3323\n",
"Noise - Commercial 3206\n",
"DOOR/WINDOW 3194\n",
"Traffic Signal Condition 2766\n",
"WATER LEAK 2501\n",
"Dirty Conditions 2283\n",
"...\n",
"Lifeguard 2\n",
"Illegal Animal Sold 2\n",
"Internal Code 2\n",
"Special Natural Area District (SNAD) 2\n",
"Fire Alarm - Replacement 2\n",
"Highway Sign - Dangling 2\n",
"Public Toilet 2\n",
"Radioactive Material 1\n",
"Calorie Labeling 1\n",
"DHS Income Savings Requirement 1\n",
"Window Guard 1\n",
"Poison Ivy 1\n",
"Bottled Water 1\n",
"Illegal Fireworks 1\n",
"DWD 1\n",
"Length: 183, dtype: int64"
]
}
],
"prompt_number": 21
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"popup = widgets.PopupWidget(\n",
" description='Scroll up to see the embedded plotly graph update',\n",
" children=[widgets.HTMLWidget(value=json.dumps(info,indent=4).replace('\\n', '<br>').replace(' ', '&nbsp'))]\n",
")\n",
"popup.set_css('display', 'none', selector='.btn')\n",
"return popup\n"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"c = df['Complaint Type'].value_counts()\n",
"url = py.plot([Bar(x=c.index, y=c.values)], filename='311 most common complaints', auto_open=False)"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"graph = GraphWidget(url)\n",
"\n",
"popup = widgets.PopupWidget(\n",
" children=[graph]\n",
")\n",
"popup.set_css('display', 'none', selector='.btn')\n",
"\n",
"popup"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 39
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Update the graph with the most common counts of a different column\n"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"a = df['Agency Name'].value_counts()\n",
"graph.restyle({'x': [a.index], 'y': [a.values]})"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 23
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Most common >>insert column here<<"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"column_headers_dropdown = widgets.DropdownWidget()\n",
"column_headers_dropdown.values = {column: column for column in df.columns}\n",
"column_headers_dropdown"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "display_data",
"text": [
"'Complaint Type'"
]
}
],
"prompt_number": 24
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def on_dropdown_selection(_, old_selection, new_selection):\n",
" clear_output()\n",
" display(new_selection)\n",
" \n",
" vc = df[new_selection].value_counts()\n",
"\n",
" graph.restyle({'x': [vc.index], 'y': [vc.values]})\n",
" \n",
"column_headers_dropdown.on_trait_change(on_dropdown_selection, 'value')"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 25
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Search complaints"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Simple dataframe filter"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"idx = df['Complaint Type'].str.contains('Tree').fillna(False)\n",
"df[idx]['Complaint Type'].value_counts()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 35,
"text": [
"Damaged Tree 2072\n",
"Overgrown Tree/Branches 1171\n",
"Dead Tree 668\n",
"Illegal Tree Damage 150\n",
"dtype: int64"
]
}
],
"prompt_number": 35
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"t = widgets.TextWidget()\n",
"t"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "display_data",
"text": [
"u''"
]
}
],
"prompt_number": 46
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def on_text_input(_, old_text, new_text):\n",
" clear_output()\n",
" display(new_text)\n",
" \n",
" idx = df['Complaint Type'].str.contains(new_text).fillna(False)\n",
" vc = df[idx][column_headers_dropdown.value].value_counts()\n",
" \n",
" graph.restyle({'x': [vc.index], 'y': [vc.values]}) \n",
" \n",
"t.on_trait_change(on_text_input, 'value', remove=False)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 47
}
],
"metadata": {}
}
]
}
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment