Skip to content

Instantly share code, notes, and snippets.

View prratek's full-sized avatar

Prratek Ramchandani prratek

  • Vox Media
  • New York, NY
View GitHub Profile
@prratek
prratek / serve.py
Created November 30, 2018 20:12
Modified serve file for Zeit deployment
from starlette.applications import Starlette
from starlette.responses import HTMLResponse, JSONResponse
from starlette.staticfiles import StaticFiles
from starlette.middleware.cors import CORSMiddleware
import uvicorn, aiohttp, asyncio
from io import BytesIO
from fastai import *
from fastai.vision import *
m = RandomForestRegressor(n_estimators=100, max_features='log2', n_jobs=-1)
m.fit(X_train, y_train)
print_score(m)
m = RandomForestRegressor(max_features='sqrt', n_jobs=-1)
m.fit(X_train, y_train)
print_score(m)
m = RandomForestRegressor(max_features='log2', n_jobs=-1)
m.fit(X_train, y_train)
print_score(m)
m = RandomForestRegressor(n_jobs=-1)
m.fit(X_train, y_train)
print_score(m)
@prratek
prratek / bagged_trees_r2.py
Created September 23, 2018 19:47
Increase in R^2 as we increase n_estimators
import numpy as np
import matplotlib.pyplot as plt
from sklearn import metrics
preds = np.stack([t.predict(X_valid) for t in m.estimators_])
preds[:,0], np.mean(preds[:,0])
plt.plot([metrics.r2_score(y_valid, np.mean(preds[:i+1], axis=0)) for i in range(10)]);
@prratek
prratek / nycTaxi_rf_baseline.py
Last active September 23, 2018 00:18
Code for Random Forest Medium blog post
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
n = 100 # use every 100th row
df = pd.read_csv('{PATH_TO_DATA}train.csv', skiprows=lambda i: i % n != 0)
m = RandomForestRegressor() # instantiate the RandomForestRegressor objects
m.fit(X_train, y_train) # train the model
m.score(X_valid, y_valid) # score it on your validation set