Skip to content

Instantly share code, notes, and snippets.

@klauszhang
Created May 14, 2016 13:17
Show Gist options
  • Save klauszhang/a3cdb2dbcb3e98de05586687d7fb6df2 to your computer and use it in GitHub Desktop.
Save klauszhang/a3cdb2dbcb3e98de05586687d7fb6df2 to your computer and use it in GitHub Desktop.
cluster by market, using c4.5 and knn, not working...
length(unique(train$hotel_market))
hotel_markets<-unique(train$hotel_market)
market<-train[hotel_market==hotel_markets[2],]
summary(market)
unique(market$hotel_continent)
setkey(market)
market$date_time<-as.POSIXct(market$date_time)
market<-market[-which(market$srch_ci==""),]
market$srch_ci<-as.Date(market$srch_ci)
market$srch_co<-as.Date(market$srch_co)
market[['stay_time']]<-market$srch_co-market$srch_ci
market$stay_time<-as.numeric(market$stay_time)
market$hotel_cluster<-as.factor(market$hotel_cluster)
market$is_booking<-as.factor(market$is_booking)
market$site_name<-as.factor(market$site_name)
market$posa_continent<-as.factor(market$posa_continent)
market$user_location_city<-as.factor(market$user_location_city)
market$user_location_country<-as.factor(market$user_location_country)
market$user_location_region<-as.factor(market$user_location_region)
market$user_id<-as.factor(market$user_id)
market$is_mobile<-as.factor(market$is_mobile)
market$is_package<-as.factor(market$is_package)
market$channel<-as.factor(market$channel)
market$srch_destination_id<-as.factor(market$srch_destination_id)
market$srch_destination_type_id<-as.factor(market$srch_destination_type_id)
market$hotel_continent<-as.factor(market$hotel_continent)
market$hotel_country<-as.factor(market$hotel_country)
market$hotel_market<-as.factor(market$hotel_market)
market$hotel_country<-as.factor(market$hotel_country)
market$hotel_country<-as.factor(market$hotel_country)
summary(market)
#set training
smp_size <- floor(0.70 * nrow(market))
## set the seed to make your partition reproductible
set.seed(123)
train_idx <- sample(seq_len(nrow(market)), size = smp_size)
train <- market[train_idx, ]
test <- market[-train_idx, ]
train$orig_destination_distance<-NULL
test$orig_destination_distance<-NULL
cls<-train$hotel_cluster
library(class)
knn(train,test,cls)
summary(time)
library(rpart)
fit <- rpart(hotel_cluster ~ date_time+stay_time+is_booking,
method="class", data=samp1)
plot(fit)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment