Skip to content

Instantly share code, notes, and snippets.

import numpy as np
import pandas as pd
import os
os.chdir("/Users/shuozhang/Desktop/data")
df=pd.read_csv('nycmodeldata.csv', sep='\t', encoding='utf-8')
#### create the prediction data set
zipcode=list(set(df['zipcode']))
Weekday=list(set(df['Weekday']))
setwd("~/Desktop/web scraping")
library(dplyr)
library(dygraphs)
library(reshape2)
library(ggplot2)
library(plotly)
library(car)
library(TSA)
library(forecast)
library(xts)
#### linear regression
import numpy as np
import pandas as pd
import os
os.chdir("/Users/shuozhang/Desktop/data")
nycmodel=pd.read_csv('nycmodeldata.csv', sep='\t', index_col=False, dtype={'zipcode':'S10'})
import statsmodels.api as sm
add_dummies = pd.get_dummies(nycmodel['zipcode'])
add_dummies=add_dummies.applymap(np.int)
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
#### yellow taxi: feature selection
os.chdir("/Users/shuozhang/Desktop/capstone/yellow taxi")
yellow_raw=pd.read_csv("yellow_tripdata_2014-05.csv", index_col=False)
yellow_raw.columns = [c.replace(" ", "") for c in yellow_raw.columns] # remove space in column name