Skip to content

Instantly share code, notes, and snippets.

@rasmi
Created September 18, 2016 17:15
Show Gist options
  • Save rasmi/83347a5e245bb326bc959ed94cfc22e5 to your computer and use it in GitHub Desktop.
Save rasmi/83347a5e245bb326bc959ed94cfc22e5 to your computer and use it in GitHub Desktop.
Analyzing NYC Heating Code violations to predict future occurrences. Project by Rasmi Elasmar at HackMIT 2016.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"%matplotlib inline\n",
"import pandas as pd\n",
"import numpy as np\n",
"import json\n",
"from datetime import datetime\n",
"from sklearn import linear_model\n",
"import matplotlib.pyplot as plt\n",
"import bokeh\n",
"from bokeh.plotting import figure, show\n",
"from bokeh.io import output_notebook\n",
"from bokeh.models import (\n",
" GMapPlot, GMapOptions, ColumnDataSource, Circle, DataRange1d, PanTool, WheelZoomTool, BoxSelectTool\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"heating_data = pd.read_json('heating_reduced_elapsed_noNA.json')"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"parse_date = lambda date_string: datetime.strptime(str(date_string), '%Y-%m-%dT%H:%M:%S.000')\n",
"string_date = lambda date: date.strftime('%Y-%m-%dT%H:%M:%S.000')"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"heating_data['year'] = heating_data['created_date'].apply(lambda date: parse_date(date).year)\n",
"heating_data['month'] = heating_data['created_date'].apply(lambda date: parse_date(date).month)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"offender_cutoff = 1000\n",
"worst_offenders_list = heating_data['incident_address'].value_counts().head(offender_cutoff).index"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false,
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"Index([u'34 ARDEN STREET', u'3810 BAILEY AVENUE', u'97 SHERMAN AVENUE',\n",
" u'957 WOODYCREST AVENUE', u'116 EAST MOSHOLU PARKWAY SOUTH',\n",
" u'635 WEST 170 STREET', u'2074 WALLACE AVENUE', u'888 GRAND CONCOURSE',\n",
" u'20 WEST MOSHOLU PARKWAY SOUTH', u'530 EAST 169 STREET',\n",
" ...\n",
" u'901 DREW STREET', u'1059 UNION STREET', u'1376 TELLER AVENUE',\n",
" u'2320 AQUEDUCT AVENUE', u'1025 LEGGETT AVENUE',\n",
" u'223 LINDEN BOULEVARD', u'1834 CATON AVENUE', u'1628 OCEAN PARKWAY',\n",
" u'1950 DALY AVENUE', u'900 GRAND CONCOURSE'],\n",
" dtype='object', length=1000)"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"worst_offenders_list"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"worst_offenders = heating_data[heating_data['incident_address'].isin(worst_offenders_list)]"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"address_year_month_counts = worst_offenders[['incident_address', 'month', 'year', 'unique_key']].pivot_table(index=['incident_address', 'year', 'month'], values='unique_key', aggfunc='count', margins=True)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"incident_address year month\n",
"1 BOGARDUS PLACE 2010 1 3.0\n",
" 3 5.0\n",
" 4 8.0\n",
" 5 8.0\n",
" 10 7.0\n",
" 11 42.0\n",
" 12 36.0\n",
" 2011 1 24.0\n",
" 2 12.0\n",
" 3 19.0\n",
" 4 6.0\n",
" 2012 1 1.0\n",
" 12 1.0\n",
" 2013 1 1.0\n",
" 12 2.0\n",
" 2014 1 21.0\n",
" 2 11.0\n",
" 3 5.0\n",
"1 JACOBUS PLACE 2010 1 3.0\n",
" 2 6.0\n",
" 3 1.0\n",
" 5 1.0\n",
" 11 13.0\n",
" 12 8.0\n",
" 2011 1 5.0\n",
" 2 1.0\n",
" 3 3.0\n",
" 4 4.0\n",
" 5 1.0\n",
" 9 1.0\n",
" ... \n",
"99 ROGERS AVENUE 2013 2 2.0\n",
" 3 1.0\n",
" 10 1.0\n",
" 11 1.0\n",
" 2014 1 6.0\n",
"995 EASTERN PARKWAY 2010 1 2.0\n",
" 3 1.0\n",
" 4 1.0\n",
" 2011 1 2.0\n",
" 7 3.0\n",
" 10 3.0\n",
" 11 6.0\n",
" 2012 1 4.0\n",
" 2 2.0\n",
" 3 8.0\n",
" 11 5.0\n",
" 12 1.0\n",
" 2013 1 9.0\n",
" 2 4.0\n",
" 4 7.0\n",
" 6 3.0\n",
" 7 5.0\n",
" 8 11.0\n",
" 9 2.0\n",
" 10 7.0\n",
" 11 35.0\n",
" 12 13.0\n",
" 2014 1 6.0\n",
" 2 11.0\n",
"All 230028.0\n",
"Name: unique_key, dtype: float64"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"address_year_month_counts"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"address_year_month_count_series = worst_offenders.groupby(['incident_address','year', 'month'])['unique_key'].count()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"incident_address year month\n",
"1 BOGARDUS PLACE 2010 1 3\n",
" 3 5\n",
" 4 8\n",
" 5 8\n",
" 10 7\n",
" 11 42\n",
" 12 36\n",
" 2011 1 24\n",
" 2 12\n",
" 3 19\n",
" 4 6\n",
" 2012 1 1\n",
" 12 1\n",
" 2013 1 1\n",
" 12 2\n",
" 2014 1 21\n",
" 2 11\n",
" 3 5\n",
"1 JACOBUS PLACE 2010 1 3\n",
" 2 6\n",
" 3 1\n",
" 5 1\n",
" 11 13\n",
" 12 8\n",
" 2011 1 5\n",
" 2 1\n",
" 3 3\n",
" 4 4\n",
" 5 1\n",
" 9 1\n",
" ..\n",
"99 ROGERS AVENUE 2013 1 5\n",
" 2 2\n",
" 3 1\n",
" 10 1\n",
" 11 1\n",
" 2014 1 6\n",
"995 EASTERN PARKWAY 2010 1 2\n",
" 3 1\n",
" 4 1\n",
" 2011 1 2\n",
" 7 3\n",
" 10 3\n",
" 11 6\n",
" 2012 1 4\n",
" 2 2\n",
" 3 8\n",
" 11 5\n",
" 12 1\n",
" 2013 1 9\n",
" 2 4\n",
" 4 7\n",
" 6 3\n",
" 7 5\n",
" 8 11\n",
" 9 2\n",
" 10 7\n",
" 11 35\n",
" 12 13\n",
" 2014 1 6\n",
" 2 11\n",
"Name: unique_key, dtype: int64"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"address_year_month_count_series"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"year_month_counts = worst_offenders.pivot_table(values='incident_address',columns=['year','month'],aggfunc='count')"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"year month\n",
"2010 1 8261\n",
" 2 5360\n",
" 3 4363\n",
" 4 2409\n",
" 5 1392\n",
" 6 369\n",
" 7 434\n",
" 8 477\n",
" 9 553\n",
" 10 3822\n",
" 11 8152\n",
" 12 11596\n",
"2011 1 9837\n",
" 2 6825\n",
" 3 5624\n",
" 4 3309\n",
" 5 1148\n",
" 6 460\n",
" 7 409\n",
" 8 403\n",
" 9 502\n",
" 10 4721\n",
" 11 6533\n",
" 12 8626\n",
"2012 1 10529\n",
" 2 7168\n",
" 3 5344\n",
" 4 3799\n",
" 5 1333\n",
" 6 781\n",
" 7 870\n",
" 8 800\n",
" 9 706\n",
" 10 3058\n",
" 11 9585\n",
" 12 8482\n",
"2013 1 11514\n",
" 2 8794\n",
" 3 7011\n",
" 4 4015\n",
" 5 1574\n",
" 6 821\n",
" 7 628\n",
" 8 616\n",
" 9 734\n",
" 10 3315\n",
" 11 8796\n",
" 12 8860\n",
"2014 1 13715\n",
" 2 8959\n",
" 3 2636\n",
"dtype: int64"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"year_month_counts"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"address_year_month_uniquekey_counts = worst_offenders[['incident_address', 'month', 'year', 'unique_key']].pivot_table(index=['incident_address', 'year'],columns='month', aggfunc='count', margins=True)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"collapsed": false,
"scrolled": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr>\n",
" <th></th>\n",
" <th></th>\n",
" <th colspan=\"13\" halign=\"left\">unique_key</th>\n",
" </tr>\n",
" <tr>\n",
" <th></th>\n",
" <th>month</th>\n",
" <th>1</th>\n",
" <th>2</th>\n",
" <th>3</th>\n",
" <th>4</th>\n",
" <th>5</th>\n",
" <th>6</th>\n",
" <th>7</th>\n",
" <th>8</th>\n",
" <th>9</th>\n",
" <th>10</th>\n",
" <th>11</th>\n",
" <th>12</th>\n",
" <th>All</th>\n",
" </tr>\n",
" <tr>\n",
" <th>incident_address</th>\n",
" <th>year</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th rowspan=\"5\" valign=\"top\">1 BOGARDUS PLACE</th>\n",
" <th>2010</th>\n",
" <td>3.0</td>\n",
" <td>NaN</td>\n",
" <td>5.0</td>\n",
" <td>8.0</td>\n",
" <td>8.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>7.0</td>\n",
" <td>42.0</td>\n",
" <td>36.0</td>\n",
" <td>109.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2011</th>\n",
" <td>24.0</td>\n",
" <td>12.0</td>\n",
" <td>19.0</td>\n",
" <td>6.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>61.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2012</th>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>2.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2013</th>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2.0</td>\n",
" <td>3.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2014</th>\n",
" <td>21.0</td>\n",
" <td>11.0</td>\n",
" <td>5.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>37.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"4\" valign=\"top\">1 JACOBUS PLACE</th>\n",
" <th>2010</th>\n",
" <td>3.0</td>\n",
" <td>6.0</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>13.0</td>\n",
" <td>8.0</td>\n",
" <td>32.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2011</th>\n",
" <td>5.0</td>\n",
" <td>1.0</td>\n",
" <td>3.0</td>\n",
" <td>4.0</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>3.0</td>\n",
" <td>2.0</td>\n",
" <td>7.0</td>\n",
" <td>27.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2012</th>\n",
" <td>5.0</td>\n",
" <td>3.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>3.0</td>\n",
" <td>11.0</td>\n",
" <td>17.0</td>\n",
" <td>39.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2013</th>\n",
" <td>8.0</td>\n",
" <td>13.0</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>2.0</td>\n",
" <td>5.0</td>\n",
" <td>3.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>33.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"5\" valign=\"top\">10 HILLSIDE AVENUE</th>\n",
" <th>2010</th>\n",
" <td>2.0</td>\n",
" <td>5.0</td>\n",
" <td>3.0</td>\n",
" <td>3.0</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>3.0</td>\n",
" <td>NaN</td>\n",
" <td>17.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2011</th>\n",
" <td>9.0</td>\n",
" <td>10.0</td>\n",
" <td>3.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>7.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>4.0</td>\n",
" <td>1.0</td>\n",
" <td>35.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2012</th>\n",
" <td>3.0</td>\n",
" <td>2.0</td>\n",
" <td>2.0</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>3.0</td>\n",
" <td>7.0</td>\n",
" <td>2.0</td>\n",
" <td>21.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2013</th>\n",
" <td>2.0</td>\n",
" <td>3.0</td>\n",
" <td>7.0</td>\n",
" <td>2.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>7.0</td>\n",
" <td>6.0</td>\n",
" <td>2.0</td>\n",
" <td>29.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2014</th>\n",
" <td>15.0</td>\n",
" <td>5.0</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>21.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"5\" valign=\"top\">10 MIDWOOD STREET</th>\n",
" <th>2010</th>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>2.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>8.0</td>\n",
" <td>13.0</td>\n",
" <td>27.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2011</th>\n",
" <td>3.0</td>\n",
" <td>2.0</td>\n",
" <td>12.0</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>3.0</td>\n",
" <td>16.0</td>\n",
" <td>11.0</td>\n",
" <td>48.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2012</th>\n",
" <td>18.0</td>\n",
" <td>NaN</td>\n",
" <td>5.0</td>\n",
" <td>3.0</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>6.0</td>\n",
" <td>1.0</td>\n",
" <td>34.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2013</th>\n",
" <td>5.0</td>\n",
" <td>1.0</td>\n",
" <td>4.0</td>\n",
" <td>5.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>3.0</td>\n",
" <td>2.0</td>\n",
" <td>13.0</td>\n",
" <td>34.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2014</th>\n",
" <td>7.0</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"5\" valign=\"top\">10 WEST 182 STREET</th>\n",
" <th>2010</th>\n",
" <td>27.0</td>\n",
" <td>10.0</td>\n",
" <td>2.0</td>\n",
" <td>4.0</td>\n",
" <td>2.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>2.0</td>\n",
" <td>7.0</td>\n",
" <td>NaN</td>\n",
" <td>9.0</td>\n",
" <td>64.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2011</th>\n",
" <td>5.0</td>\n",
" <td>3.0</td>\n",
" <td>6.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>4.0</td>\n",
" <td>2.0</td>\n",
" <td>10.0</td>\n",
" <td>31.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2012</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>3.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2.0</td>\n",
" <td>2.0</td>\n",
" <td>7.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2013</th>\n",
" <td>2.0</td>\n",
" <td>19.0</td>\n",
" <td>1.0</td>\n",
" <td>8.0</td>\n",
" <td>6.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2.0</td>\n",
" <td>10.0</td>\n",
" <td>2.0</td>\n",
" <td>50.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2014</th>\n",
" <td>11.0</td>\n",
" <td>9.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>20.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"5\" valign=\"top\">1001 JEROME AVENUE</th>\n",
" <th>2010</th>\n",
" <td>9.0</td>\n",
" <td>12.0</td>\n",
" <td>1.0</td>\n",
" <td>3.0</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2.0</td>\n",
" <td>3.0</td>\n",
" <td>3.0</td>\n",
" <td>34.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2011</th>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>5.0</td>\n",
" <td>8.0</td>\n",
" <td>11.0</td>\n",
" <td>27.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2012</th>\n",
" <td>2.0</td>\n",
" <td>NaN</td>\n",
" <td>5.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>3.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>12.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2013</th>\n",
" <td>1.0</td>\n",
" <td>6.0</td>\n",
" <td>3.0</td>\n",
" <td>2.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>2.0</td>\n",
" <td>16.0</td>\n",
" <td>14.0</td>\n",
" <td>45.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2014</th>\n",
" <td>27.0</td>\n",
" <td>10.0</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>38.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1005 JEROME AVENUE</th>\n",
" <th>2010</th>\n",
" <td>24.0</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2.0</td>\n",
" <td>NaN</td>\n",
" <td>2.0</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2.0</td>\n",
" <td>32.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"4\" valign=\"top\">97-07 HORACE HARDING EXPRESSWAY</th>\n",
" <th>2011</th>\n",
" <td>15.0</td>\n",
" <td>5.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>23.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2012</th>\n",
" <td>6.0</td>\n",
" <td>NaN</td>\n",
" <td>3.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>3.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>12.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2013</th>\n",
" <td>23.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>25.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2014</th>\n",
" <td>5.0</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>6.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"5\" valign=\"top\">9718 KINGS HIGHWAY</th>\n",
" <th>2010</th>\n",
" <td>6.0</td>\n",
" <td>5.0</td>\n",
" <td>2.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>4.0</td>\n",
" <td>16.0</td>\n",
" <td>19.0</td>\n",
" <td>53.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2011</th>\n",
" <td>2.0</td>\n",
" <td>2.0</td>\n",
" <td>2.0</td>\n",
" <td>2.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>4.0</td>\n",
" <td>2.0</td>\n",
" <td>4.0</td>\n",
" <td>19.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2012</th>\n",
" <td>4.0</td>\n",
" <td>2.0</td>\n",
" <td>2.0</td>\n",
" <td>5.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>3.0</td>\n",
" <td>9.0</td>\n",
" <td>25.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2013</th>\n",
" <td>5.0</td>\n",
" <td>8.0</td>\n",
" <td>10.0</td>\n",
" <td>4.0</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>10.0</td>\n",
" <td>26.0</td>\n",
" <td>66.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2014</th>\n",
" <td>19.0</td>\n",
" <td>13.0</td>\n",
" <td>5.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>37.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"5\" valign=\"top\">975 WALTON AVENUE</th>\n",
" <th>2010</th>\n",
" <td>8.0</td>\n",
" <td>13.0</td>\n",
" <td>11.0</td>\n",
" <td>8.0</td>\n",
" <td>9.0</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>2.0</td>\n",
" <td>18.0</td>\n",
" <td>13.0</td>\n",
" <td>13.0</td>\n",
" <td>96.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2011</th>\n",
" <td>11.0</td>\n",
" <td>2.0</td>\n",
" <td>6.0</td>\n",
" <td>13.0</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>2.0</td>\n",
" <td>7.0</td>\n",
" <td>9.0</td>\n",
" <td>21.0</td>\n",
" <td>8.0</td>\n",
" <td>11.0</td>\n",
" <td>91.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2012</th>\n",
" <td>16.0</td>\n",
" <td>7.0</td>\n",
" <td>10.0</td>\n",
" <td>36.0</td>\n",
" <td>19.0</td>\n",
" <td>6.0</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>30.0</td>\n",
" <td>4.0</td>\n",
" <td>129.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2013</th>\n",
" <td>2.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>2.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>9.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2014</th>\n",
" <td>5.0</td>\n",
" <td>NaN</td>\n",
" <td>2.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>7.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"5\" valign=\"top\">98 MORNINGSIDE AVENUE</th>\n",
" <th>2010</th>\n",
" <td>8.0</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>4.0</td>\n",
" <td>9.0</td>\n",
" <td>22.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2011</th>\n",
" <td>NaN</td>\n",
" <td>2.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>9.0</td>\n",
" <td>11.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2012</th>\n",
" <td>7.0</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>41.0</td>\n",
" <td>NaN</td>\n",
" <td>51.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2013</th>\n",
" <td>2.0</td>\n",
" <td>5.0</td>\n",
" <td>8.0</td>\n",
" <td>2.0</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>2.0</td>\n",
" <td>2.0</td>\n",
" <td>2.0</td>\n",
" <td>9.0</td>\n",
" <td>34.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2014</th>\n",
" <td>19.0</td>\n",
" <td>14.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>33.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"5\" valign=\"top\">99 ROGERS AVENUE</th>\n",
" <th>2010</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2.0</td>\n",
" <td>3.0</td>\n",
" <td>6.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2011</th>\n",
" <td>7.0</td>\n",
" <td>2.0</td>\n",
" <td>5.0</td>\n",
" <td>2.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>10.0</td>\n",
" <td>17.0</td>\n",
" <td>34.0</td>\n",
" <td>77.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2012</th>\n",
" <td>29.0</td>\n",
" <td>18.0</td>\n",
" <td>10.0</td>\n",
" <td>2.0</td>\n",
" <td>8.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2.0</td>\n",
" <td>3.0</td>\n",
" <td>8.0</td>\n",
" <td>80.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2013</th>\n",
" <td>5.0</td>\n",
" <td>2.0</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>10.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2014</th>\n",
" <td>6.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>6.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"5\" valign=\"top\">995 EASTERN PARKWAY</th>\n",
" <th>2010</th>\n",
" <td>2.0</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>4.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2011</th>\n",
" <td>2.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>3.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>3.0</td>\n",
" <td>6.0</td>\n",
" <td>NaN</td>\n",
" <td>14.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2012</th>\n",
" <td>4.0</td>\n",
" <td>2.0</td>\n",
" <td>8.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>5.0</td>\n",
" <td>1.0</td>\n",
" <td>20.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2013</th>\n",
" <td>9.0</td>\n",
" <td>4.0</td>\n",
" <td>NaN</td>\n",
" <td>7.0</td>\n",
" <td>NaN</td>\n",
" <td>3.0</td>\n",
" <td>5.0</td>\n",
" <td>11.0</td>\n",
" <td>2.0</td>\n",
" <td>7.0</td>\n",
" <td>35.0</td>\n",
" <td>13.0</td>\n",
" <td>96.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2014</th>\n",
" <td>6.0</td>\n",
" <td>11.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>17.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>All</th>\n",
" <th></th>\n",
" <td>53856.0</td>\n",
" <td>37106.0</td>\n",
" <td>24978.0</td>\n",
" <td>13532.0</td>\n",
" <td>5447.0</td>\n",
" <td>2431.0</td>\n",
" <td>2341.0</td>\n",
" <td>2296.0</td>\n",
" <td>2495.0</td>\n",
" <td>14916.0</td>\n",
" <td>33066.0</td>\n",
" <td>37564.0</td>\n",
" <td>230028.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>4846 rows × 13 columns</p>\n",
"</div>"
],
"text/plain": [
" unique_key \\\n",
"month 1 2 3 4 \n",
"incident_address year \n",
"1 BOGARDUS PLACE 2010 3.0 NaN 5.0 8.0 \n",
" 2011 24.0 12.0 19.0 6.0 \n",
" 2012 1.0 NaN NaN NaN \n",
" 2013 1.0 NaN NaN NaN \n",
" 2014 21.0 11.0 5.0 NaN \n",
"1 JACOBUS PLACE 2010 3.0 6.0 1.0 NaN \n",
" 2011 5.0 1.0 3.0 4.0 \n",
" 2012 5.0 3.0 NaN NaN \n",
" 2013 8.0 13.0 1.0 NaN \n",
"10 HILLSIDE AVENUE 2010 2.0 5.0 3.0 3.0 \n",
" 2011 9.0 10.0 3.0 NaN \n",
" 2012 3.0 2.0 2.0 1.0 \n",
" 2013 2.0 3.0 7.0 2.0 \n",
" 2014 15.0 5.0 1.0 NaN \n",
"10 MIDWOOD STREET 2010 1.0 1.0 2.0 1.0 \n",
" 2011 3.0 2.0 12.0 NaN \n",
" 2012 18.0 NaN 5.0 3.0 \n",
" 2013 5.0 1.0 4.0 5.0 \n",
" 2014 7.0 1.0 NaN NaN \n",
"10 WEST 182 STREET 2010 27.0 10.0 2.0 4.0 \n",
" 2011 5.0 3.0 6.0 NaN \n",
" 2012 NaN NaN NaN NaN \n",
" 2013 2.0 19.0 1.0 8.0 \n",
" 2014 11.0 9.0 NaN NaN \n",
"1001 JEROME AVENUE 2010 9.0 12.0 1.0 3.0 \n",
" 2011 1.0 NaN NaN NaN \n",
" 2012 2.0 NaN 5.0 NaN \n",
" 2013 1.0 6.0 3.0 2.0 \n",
" 2014 27.0 10.0 1.0 NaN \n",
"1005 JEROME AVENUE 2010 24.0 1.0 NaN NaN \n",
"... ... ... ... ... \n",
"97-07 HORACE HARDING EXPRESSWAY 2011 15.0 5.0 NaN NaN \n",
" 2012 6.0 NaN 3.0 NaN \n",
" 2013 23.0 1.0 1.0 NaN \n",
" 2014 5.0 1.0 NaN NaN \n",
"9718 KINGS HIGHWAY 2010 6.0 5.0 2.0 NaN \n",
" 2011 2.0 2.0 2.0 2.0 \n",
" 2012 4.0 2.0 2.0 5.0 \n",
" 2013 5.0 8.0 10.0 4.0 \n",
" 2014 19.0 13.0 5.0 NaN \n",
"975 WALTON AVENUE 2010 8.0 13.0 11.0 8.0 \n",
" 2011 11.0 2.0 6.0 13.0 \n",
" 2012 16.0 7.0 10.0 36.0 \n",
" 2013 2.0 1.0 1.0 2.0 \n",
" 2014 5.0 NaN 2.0 NaN \n",
"98 MORNINGSIDE AVENUE 2010 8.0 1.0 NaN NaN \n",
" 2011 NaN 2.0 NaN NaN \n",
" 2012 7.0 1.0 NaN NaN \n",
" 2013 2.0 5.0 8.0 2.0 \n",
" 2014 19.0 14.0 NaN NaN \n",
"99 ROGERS AVENUE 2010 NaN NaN NaN 1.0 \n",
" 2011 7.0 2.0 5.0 2.0 \n",
" 2012 29.0 18.0 10.0 2.0 \n",
" 2013 5.0 2.0 1.0 NaN \n",
" 2014 6.0 NaN NaN NaN \n",
"995 EASTERN PARKWAY 2010 2.0 NaN 1.0 1.0 \n",
" 2011 2.0 NaN NaN NaN \n",
" 2012 4.0 2.0 8.0 NaN \n",
" 2013 9.0 4.0 NaN 7.0 \n",
" 2014 6.0 11.0 NaN NaN \n",
"All 53856.0 37106.0 24978.0 13532.0 \n",
"\n",
" \\\n",
"month 5 6 7 8 9 \n",
"incident_address year \n",
"1 BOGARDUS PLACE 2010 8.0 NaN NaN NaN NaN \n",
" 2011 NaN NaN NaN NaN NaN \n",
" 2012 NaN NaN NaN NaN NaN \n",
" 2013 NaN NaN NaN NaN NaN \n",
" 2014 NaN NaN NaN NaN NaN \n",
"1 JACOBUS PLACE 2010 1.0 NaN NaN NaN NaN \n",
" 2011 1.0 NaN NaN NaN 1.0 \n",
" 2012 NaN NaN NaN NaN NaN \n",
" 2013 2.0 5.0 3.0 NaN NaN \n",
"10 HILLSIDE AVENUE 2010 NaN 1.0 NaN NaN NaN \n",
" 2011 NaN 7.0 NaN NaN NaN \n",
" 2012 NaN NaN NaN NaN 1.0 \n",
" 2013 NaN NaN NaN NaN NaN \n",
" 2014 NaN NaN NaN NaN NaN \n",
"10 MIDWOOD STREET 2010 1.0 NaN NaN NaN NaN \n",
" 2011 1.0 NaN NaN NaN NaN \n",
" 2012 1.0 NaN NaN NaN NaN \n",
" 2013 NaN NaN NaN 1.0 NaN \n",
" 2014 NaN NaN NaN NaN NaN \n",
"10 WEST 182 STREET 2010 2.0 NaN NaN 1.0 2.0 \n",
" 2011 NaN NaN NaN 1.0 NaN \n",
" 2012 NaN NaN 3.0 NaN NaN \n",
" 2013 6.0 NaN NaN NaN NaN \n",
" 2014 NaN NaN NaN NaN NaN \n",
"1001 JEROME AVENUE 2010 1.0 NaN NaN NaN NaN \n",
" 2011 1.0 NaN 1.0 NaN NaN \n",
" 2012 NaN NaN NaN NaN NaN \n",
" 2013 NaN NaN NaN NaN 1.0 \n",
" 2014 NaN NaN NaN NaN NaN \n",
"1005 JEROME AVENUE 2010 NaN 2.0 NaN 2.0 1.0 \n",
"... ... ... ... ... ... \n",
"97-07 HORACE HARDING EXPRESSWAY 2011 2.0 NaN NaN NaN NaN \n",
" 2012 NaN NaN NaN NaN 3.0 \n",
" 2013 NaN NaN NaN NaN NaN \n",
" 2014 NaN NaN NaN NaN NaN \n",
"9718 KINGS HIGHWAY 2010 NaN NaN NaN 1.0 NaN \n",
" 2011 NaN NaN 1.0 NaN NaN \n",
" 2012 NaN NaN NaN NaN NaN \n",
" 2013 NaN 1.0 NaN 1.0 NaN \n",
" 2014 NaN NaN NaN NaN NaN \n",
"975 WALTON AVENUE 2010 9.0 NaN 1.0 NaN 2.0 \n",
" 2011 NaN 1.0 2.0 7.0 9.0 \n",
" 2012 19.0 6.0 1.0 NaN NaN \n",
" 2013 1.0 1.0 NaN 1.0 NaN \n",
" 2014 NaN NaN NaN NaN NaN \n",
"98 MORNINGSIDE AVENUE 2010 NaN NaN NaN NaN NaN \n",
" 2011 NaN NaN NaN NaN NaN \n",
" 2012 1.0 NaN NaN NaN NaN \n",
" 2013 1.0 NaN NaN 1.0 2.0 \n",
" 2014 NaN NaN NaN NaN NaN \n",
"99 ROGERS AVENUE 2010 NaN NaN NaN NaN NaN \n",
" 2011 NaN NaN NaN NaN NaN \n",
" 2012 8.0 NaN NaN NaN NaN \n",
" 2013 NaN NaN NaN NaN NaN \n",
" 2014 NaN NaN NaN NaN NaN \n",
"995 EASTERN PARKWAY 2010 NaN NaN NaN NaN NaN \n",
" 2011 NaN NaN 3.0 NaN NaN \n",
" 2012 NaN NaN NaN NaN NaN \n",
" 2013 NaN 3.0 5.0 11.0 2.0 \n",
" 2014 NaN NaN NaN NaN NaN \n",
"All 5447.0 2431.0 2341.0 2296.0 2495.0 \n",
"\n",
" \n",
"month 10 11 12 All \n",
"incident_address year \n",
"1 BOGARDUS PLACE 2010 7.0 42.0 36.0 109.0 \n",
" 2011 NaN NaN NaN 61.0 \n",
" 2012 NaN NaN 1.0 2.0 \n",
" 2013 NaN NaN 2.0 3.0 \n",
" 2014 NaN NaN NaN 37.0 \n",
"1 JACOBUS PLACE 2010 NaN 13.0 8.0 32.0 \n",
" 2011 3.0 2.0 7.0 27.0 \n",
" 2012 3.0 11.0 17.0 39.0 \n",
" 2013 1.0 NaN NaN 33.0 \n",
"10 HILLSIDE AVENUE 2010 NaN 3.0 NaN 17.0 \n",
" 2011 1.0 4.0 1.0 35.0 \n",
" 2012 3.0 7.0 2.0 21.0 \n",
" 2013 7.0 6.0 2.0 29.0 \n",
" 2014 NaN NaN NaN 21.0 \n",
"10 MIDWOOD STREET 2010 NaN 8.0 13.0 27.0 \n",
" 2011 3.0 16.0 11.0 48.0 \n",
" 2012 NaN 6.0 1.0 34.0 \n",
" 2013 3.0 2.0 13.0 34.0 \n",
" 2014 NaN NaN NaN 8.0 \n",
"10 WEST 182 STREET 2010 7.0 NaN 9.0 64.0 \n",
" 2011 4.0 2.0 10.0 31.0 \n",
" 2012 NaN 2.0 2.0 7.0 \n",
" 2013 2.0 10.0 2.0 50.0 \n",
" 2014 NaN NaN NaN 20.0 \n",
"1001 JEROME AVENUE 2010 2.0 3.0 3.0 34.0 \n",
" 2011 5.0 8.0 11.0 27.0 \n",
" 2012 3.0 1.0 1.0 12.0 \n",
" 2013 2.0 16.0 14.0 45.0 \n",
" 2014 NaN NaN NaN 38.0 \n",
"1005 JEROME AVENUE 2010 NaN NaN 2.0 32.0 \n",
"... ... ... ... ... \n",
"97-07 HORACE HARDING EXPRESSWAY 2011 NaN NaN 1.0 23.0 \n",
" 2012 NaN NaN NaN 12.0 \n",
" 2013 NaN NaN NaN 25.0 \n",
" 2014 NaN NaN NaN 6.0 \n",
"9718 KINGS HIGHWAY 2010 4.0 16.0 19.0 53.0 \n",
" 2011 4.0 2.0 4.0 19.0 \n",
" 2012 NaN 3.0 9.0 25.0 \n",
" 2013 1.0 10.0 26.0 66.0 \n",
" 2014 NaN NaN NaN 37.0 \n",
"975 WALTON AVENUE 2010 18.0 13.0 13.0 96.0 \n",
" 2011 21.0 8.0 11.0 91.0 \n",
" 2012 NaN 30.0 4.0 129.0 \n",
" 2013 NaN NaN NaN 9.0 \n",
" 2014 NaN NaN NaN 7.0 \n",
"98 MORNINGSIDE AVENUE 2010 NaN 4.0 9.0 22.0 \n",
" 2011 NaN NaN 9.0 11.0 \n",
" 2012 1.0 41.0 NaN 51.0 \n",
" 2013 2.0 2.0 9.0 34.0 \n",
" 2014 NaN NaN NaN 33.0 \n",
"99 ROGERS AVENUE 2010 NaN 2.0 3.0 6.0 \n",
" 2011 10.0 17.0 34.0 77.0 \n",
" 2012 2.0 3.0 8.0 80.0 \n",
" 2013 1.0 1.0 NaN 10.0 \n",
" 2014 NaN NaN NaN 6.0 \n",
"995 EASTERN PARKWAY 2010 NaN NaN NaN 4.0 \n",
" 2011 3.0 6.0 NaN 14.0 \n",
" 2012 NaN 5.0 1.0 20.0 \n",
" 2013 7.0 35.0 13.0 96.0 \n",
" 2014 NaN NaN NaN 17.0 \n",
"All 14916.0 33066.0 37564.0 230028.0 \n",
"\n",
"[4846 rows x 13 columns]"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"address_year_month_uniquekey_counts"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# http://stackoverflow.com/a/24376671/950845\n",
"# http://stackoverflow.com/a/37819973/950845\n",
"results = {}\n",
"for t in address_year_month_uniquekey_counts.itertuples():\n",
" address = t[0][0]\n",
" year = t[0][1]\n",
" if address not in results:\n",
" results[address] = {}\n",
" if year not in results[address]:\n",
" results[address][year] = {}\n",
" # Add counts to each month.\n",
" for month, count in enumerate(t[1:13], 1):\n",
" results[address][year][month] = count\n",
" # Add total count at the end.\n",
" #results[address][year]['All'] = t[-1]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"with open('address_year_month_counts.json', 'w') as outfile:\n",
" json.dump(results, outfile)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"by_month = worst_offenders.groupby(['month']).count()"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"worst_offenders_dropped = worst_offenders.dropna()"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"bayesianridge = linear_model.BayesianRidge()\n",
"bayesianridgemodel = bayesianridge.fit(worst_offenders_dropped['latitude'].reshape(len(worst_offenders_dropped),1), worst_offenders_dropped['longitude'])"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"array([-73.9081696 , -73.94885902, -73.94885902, ..., -73.90998785,\n",
" -73.90319309, -73.91520454])"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"bayesianridgemodel.predict(worst_offenders_dropped['latitude'].reshape(len(worst_offenders_dropped),1))"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"regression = linear_model.LinearRegression()\n",
"regressionmodel = regression.fit(worst_offenders_dropped['latitude'].reshape(len(worst_offenders_dropped),1), worst_offenders_dropped['longitude'])"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"array([-73.9081694 , -73.94885955, -73.94885955, ..., -73.90998768,\n",
" -73.9031928 , -73.91520447])"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"regressionmodel.predict(worst_offenders_dropped['latitude'].reshape(len(worst_offenders_dropped),1))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"map_options = GMapOptions(lat=40.71, lng=-74, map_type=\"roadmap\", zoom=10, api_key='AIzaSyCZeeuNFl8MaKF4u6btT7m230TcHoT0KXk')\n",
"\n",
"plot = GMapPlot(\n",
" x_range=DataRange1d(), y_range=DataRange1d(), map_options=map_options, title=\"Heating Violations\"\n",
")\n",
"\n",
"source = ColumnDataSource(\n",
" data=dict(\n",
" lat=worst_offenders_dropped['latitude'],\n",
" lon=worst_offenders_dropped['longitude'],\n",
" )\n",
")\n",
"\n",
"circle = Circle(x=\"lon\", y=\"lat\", size=15, fill_color=\"blue\", fill_alpha=0.8, line_color=None)\n",
"plot.add_glyph(source, circle)\n",
"plot.add_tools(PanTool(), WheelZoomTool(), BoxSelectTool())\n",
"show(plot)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.10"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment