This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# assigning weight on service percentage. | |
score <- c() | |
for (j in rep(1:141, each=1)){ | |
for (i in rep(1:20)) { | |
weight <- (i/20)*SERVICE[j,i] #Higher weight on recent years and lower weight on older years | |
score <- rbind(score,weight) | |
}} | |
head(score) | |
# [,1] | |
# weight 0.02000 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
GBMfit.predictions <- predict(GBMfit,newdata=SUBHOLDOUT,type="prob") | |
GBMfit.predictions$CustomerID <- SUBHOLDOUT$CustomerID | |
GBMfit.predictions$Churn <- GBMfit.predictions$Yes | |
GBMfit.predictions <- GBMfit.predictions[-c(1:2)] | |
results <- data.frame(pred=ifelse(GBMfit.predictions$Yes > .5, "Yes", "No"),actual=SUBHOLDOUT$Churn) | |
confusionMatrix(results$pred, results$actual) | |
# Confusion Matrix and Statistics | |
# | |
# Reference |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
varImp(GBMfit) | |
# only 20 most important variables shown (out of 112) | |
# | |
# Overall | |
# CurrentEquipmentDays 100.000 | |
# MonthsInService 69.802 | |
# MonthlyMinutes 61.336 | |
# PercChangeMinutes 54.557 | |
# PercChangeRevenues 24.755 | |
# TotalRecurringCharge 22.134 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
MASTERRESULTS <- rbind(RPARTfit$results[,2:7], | |
FORESTfit.new$results[,2:7], | |
GBMfit$results[,5:10]) | |
MASTERRESULTS$method <- c(rep("rpart",nrow(RPARTfit$results)), | |
rep("randomforest",nrow(FORESTfit.new$results)), | |
rep("gbm",nrow(GBMfit$results))) | |
MASTERRESULTS$param <- c(RPARTfit$results$cp, | |
FORESTfit.new$results$mtry, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
gbmGrid <- expand.grid(n.trees=c(500,1000,2000), | |
interaction.depth=2:4, | |
shrinkage=c(.005,.01), | |
n.minobsinnode=c(5,10)) | |
cluster <- makeCluster(detectCores() - 1) # convention to leave 1 core for OS | |
registerDoParallel(cluster) | |
fitControl <- trainControl(method="cv",number=5, verboseIter = TRUE, | |
summaryFunction = twoClassSummary, classProbs = TRUE, allowParallel = TRUE) | |
set.seed(474); GBMfit <- train(Churn~.,data=SUBTRAIN,method="gbm",metric="ROC",trControl=fitControl,tuneGrid=gbmGrid,verbose=FALSE) | |
stopCluster(cluster) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
forestGrid <- expand.grid(mtry=c(1,2,3,7)) | |
cluster <- makeCluster(detectCores() - 1) # convention to leave 1 core for OS | |
registerDoParallel(cluster) | |
fitControl <- trainControl(method="cv",number=5, verboseIter = TRUE, | |
summaryFunction = twoClassSummary, classProbs = TRUE, allowParallel = TRUE) | |
set.seed(seed); FORESTfit.new <- train(Churn~.,data=SUBTRAIN,method="rf",metric="ROC",trControl=fitControl,tuneGrid=forestGrid) | |
stopCluster(cluster) | |
registerDoSEQ() | |
FORESTfit.new$results[rownames(FORESTfit.new$bestTune),] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
BEST <- list() | |
seed <- 111 | |
set.seed(seed) | |
#################################### | |
# Partition | |
#################################### | |
rpartGrid <- expand.grid(cp=10^seq(from=-4,to=-1,length=30)) | |
#train function | |
cluster <- makeCluster(detectCores() - 1) # convention to leave 1 core for OS |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
####################################### | |
# Handling rare levels################# | |
####################################### | |
service <- as.character(CELL$ServiceArea) | |
head(service) # [1] "SEAPOR503" "PITHOM412" "MILMIL414" "PITHOM412" "OKCTUL918" "OKCTUL918" | |
## Extracting area code | |
area.code <- substr(service,start=7,stop=9) | |
area.code <- combine_infrequent_levels(area.code,threshold=20)$values | |
area.code <- factor( area.code ) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Process data together | |
CELL <- rbind(TRAIN,HOLDOUT) | |
CELL$MissingMonthlyRev <- factor( ifelse(is.na(CELL$MonthlyRevenue),"Yes","No") ) | |
CELL$MissingPercentMin <- factor( ifelse(is.na(CELL$PercChangeMinutes),"Yes","No") ) | |
CELL$MissingMonthlyMin <- factor( ifelse(is.na(CELL$MonthlyMinutes),"Yes","No") ) | |
CELL$MissingTotalRecur <- factor( ifelse(is.na(CELL$TotalRecurringCharge),"Yes","No") ) | |
CELL$MissingDirecAssis <- factor( ifelse(is.na(CELL$DirectorAssistedCalls),"Yes","No") ) | |
CELL$MissingOverageMin <- factor( ifelse(is.na(CELL$OverageMinutes),"Yes","No") ) | |
CELL$MissingRoamingCalls <- factor( ifelse(is.na(CELL$RoamingCalls),"Yes","No") ) | |
CELL$MissingPercChangeRev <- factor( ifelse(is.na(CELL$PercChangeRevenues),"Yes","No") ) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# importing data | |
TRAIN <- read.csv("cell2celltrain.csv") | |
HOLDOUT <- read.csv("cell2cellholdout.csv") | |
SS <- read.csv("cell2cellsamplesubmission.csv") | |
################################################### | |
#Check extent of missingness if there's any pattern | |
################################################### | |
MISSING <- data.frame( Column=names(TRAIN), | |
MissingInTRAIN=as.numeric(unlist(lapply(TRAIN,function(x)sum(is.na(x))))), |
NewerOlder