Skip to content

Instantly share code, notes, and snippets.

Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
cur_data = price_data.copy()
cur_data = cur_data.merge(rental_data, on="id")
cur_data = cur_data.merge(location_data, on="id")
cur_data = cur_data.merge(geo_data, on="id")
X_train, X_test, y_train, y_test = \
custom_train_test_split(cur_data)
pass_cols = ["is_brooklyn", "density"]
drop_cols = ["year", "geometry", "zipcode"]
accommodates accommodates _per_bathrooms accommodates _per_bedrooms accommodates bedrooms center_dist _is_brooklyn food_density food_density _is_brooklyn is_brooklyn
All NYC 34 -14 -9 -4 -6 8 16 -9
Manhattan 51 -15 -13 -13 - 9 - -
Brooklyn 43 -19 -16 -7 - 16 - -
Linear SVM 25 -11 -7 -6 -1 3 21 -25
XGBoost 7 1 1 38 29 22 1 0
AdaBoost 4 1 0 25 14 51 4 0
Extra Forest 6 4 4 8 16 45 11 7
Random Forest 2 1 2 26 11 56 3 0
Feature Relative Importance
accommodates 1.0
accommodates _per_bathrooms -0.428
accommodates _per_bedrooms -0.28
food_density 0.248
is_brooklyn -0.231
center_dist -0.102
density -0.086
Dataset Count Parameter Descriptor
Apartments 15k N Airbnb
Restaurants 15k M DOH
Subway Stations 300 M MTA
Hotspots 10 M Red Diamonds
Cost Epicenter 1 M Orange Star
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
import numpy as np
import pandas as pd
import geopandas
import pickle
from sklearn.cluster import KMeans
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
name lat lon price is_com is_bkn
0 center 40.7269845195146 -73.96825439838058 170 True False
1 wash sq 40.73232616009288 -74.00113694319073 197 False False
2 flatiron 40.74010002079383 -73.99202965334189 555 False False
3 bowery 40.728100910208695 -73.99392182445409 298 False False
4 uptown 40.79674265987271 -73.95298422453986 302 False False
5 midtown 40.76134044954954 -73.98369000821411 303 False False
6 barclays 40.68061129890911 -73.97564856875336 267 False True
7 bushwick 40.688196501332044 -73.92715263054822 148 False True
8 williamsburg 40.715948534974885 -73.95220786983514 225 False True
feature coefficient
accommodates 1.0
accommodates _per_bathrooms -0.43
is_brooklyn -0.405
accommodates _per_bedrooms -0.338
accommodates bedrooms -0.305
density -0.115
accommodates bathrooms 0.111
bedrooms _per_beds -0.108
accommodates _per_beds 0.105
cur_data = price_data.copy()
cur_data = cur_data.merge(rental_data, on="id")
cur_data = cur_data.merge(location_data, on="id")
X_train, X_test, y_train, y_test = \
custom_train_test_split(cur_data)
pass_cols = ["is_brooklyn", "density"]
drop_cols = ["year", "geometry", "zipcode"]
one_hot_cols = ["month"]