Create a gist now

Instantly share code, notes, and snippets.

What would you like to do?
#This project was done as a team with Tarun DeviReddy.
library(ISLR)
library(MASS)
library(rpart)
library(rpart.plot)
library(randomForest)
library(ROCR)
library(party)
library(dplyr)
library(ggplot2)
library(mice)
train <- read.csv("~/Downloads/bicycle/train.csv")
test <- read.csv("~/Downloads/bicycle/test.csv")
full = bind_rows(train, test)
#Time
full$time= sapply(strsplit(full$datetime,split=" "),tail,1)
full$time= sapply(strsplit(full$time,split=":"),head,1)
full$time=as.numeric(full$time)
#Day
date=substr(full$datetime,1,10)
days<-weekdays(as.Date(date))
full$day=days
full$day=as.factor(full$day)
#year
year=substr(full$datetime, 1,4)
full$year=year
#month_num
month_num = substr(full$datetime, 6,7)
full$month_num=month_num
full$month_num_num=as.numeric(full$month_num)
#converting to factors
full$season=as.factor(full$season)
full$holiday=as.factor(full$holiday)
full$workingday=as.factor(full$workingday)
full$weather=as.factor(full$weather)
full$day=as.factor(full$day)
full$year=as.factor(full$year)
full$month=as.factor(full$month)
full$time=as.numeric(full$time)
full$month_num=as.numeric(full$month_num)
#time slots
tree1 = rpart(registered~time, full[1:10886,])
rpart.plot(tree1)
full$timeslotreg = as.factor(ifelse(full$time<7.5, "B1",
ifelse(full$time>=22,"B2",
ifelse(full$time<18, ifelse(full$time>=9.5, "B3",
ifelse(full$time<8.5, "B4", "B5")),
ifelse(full$time>=20, "B6", "B7")))))
tree2 = rpart(registered~atemp, full[1:10886,])
rpart.plot(tree2)
full$tempslotred = as.factor(ifelse(full$time<30, ifelse(full$time<15,"T1", "T2"), "T3"))
tree3 = rpart(casual~time, full[1:10886,])
rpart.plot(tree3)
full$timeslotcas = as.factor(ifelse(full$time<10, ifelse(full$time<8.5,"A1", "A2"), ifelse(full$time>=20,"A3", "A4")))
tree4 = rpart(casual~atemp, full[1:10886,])
rpart.plot(tree4)
full$tempslotcas = as.factor(ifelse(full$time<31, ifelse(full$time<19,"C1", "C2"), ifelse(full$time>=20,"C3", "C4")))
# Diving in quarters
full$year_part[full$year=='2011']=1
full$year_part[full$year=='2011' & full$month_num>3]=2
full$year_part[full$year=='2011' & full$month_num>6]=3
full$year_part[full$year=='2011' & full$month_num>9]=4
full$year_part[full$year=='2012']=5
full$year_part[full$year=='2012' & full$month_num>3]=6
full$year_part[full$year=='2012' & full$month_num>6]=7
full$year_part[full$year=='2012' & full$month_num>9]=8
table(full$year_part)
# combining holiday and working day
full$daytype = as.factor(ifelse(full$day == "Saturday" | full$day=="Sunday", "weekend",
ifelse(full$holiday==0 & full$workingday==1, "workingday",
ifelse(full$holiday==1,"holiday", "None"))))
full$weekend = 0
full$weekend[full$day=="Saturday" | full$day=="Sunday"] = 1
full$weekend=as.factor(full$weekend)
#Windspeed
wind0=subset(full, full$windspeed==0)
wind1=subset(full, full$windspeed!=0)
rf_wind = randomForest(windspeed ~ season+weather +humidity +month+temp+ year+atemp, data=wind1, importance=T, ntree=250)
wind = predict(rf_wind, wind0)
wind0$windspeed = wind
full = rbind(wind1, wind0)
# train and test
full$time=as.factor(full$time)
train=full[as.integer(substr(full$datetime,9,10))<20,]
test=full[as.integer(substr(full$datetime,9,10))>19,]
train$reg1=train$registered+1
train$cas1=train$casual+1
train$logcas=log(train$cas1)
train$logreg=log(train$reg1)
test$logreg=0
test$logcas=0
#Random Forest
rf = randomForest(logreg ~ time + workingday + day + holiday + daytype + tempslot + humidity +
atemp + windspeed + season + weekend + weather+timeslot+year+year_part,
data = train, importance = T, ntree=250 )
pred_regs = predict(rf, test)
pred_regs = exp(pred_regs) - 1
rf_cas = randomForest(logcas ~ time + workingday + day + holiday + daytype + tempslotcas + humidity +
atemp + windspeed + weekend+ season +weather+timeslotcas+year+year_part,
data = train, importance = T, ntree=250 )
pred_cas = predict(rf_cas, test)
pred_cas = exp(pred_cas) - 1
solution = data.frame(datetime = test$datetime, count = (pred_cas+pred_regs))
write.csv(solution, "output11_bc.csv", row.names = FALSE)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment