Skip to content

Instantly share code, notes, and snippets.

@dkhurana1306
Created September 7, 2016 22:41
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dkhurana1306/53eda3bd07e03b746adacc3937ec5f34 to your computer and use it in GitHub Desktop.
Save dkhurana1306/53eda3bd07e03b746adacc3937ec5f34 to your computer and use it in GitHub Desktop.
#############################################################
model_data = data_higgs_0_cleaned_scaled
label = as.integer(model_data$Label) -1
drop = c(ncol(model_data),ncol(model_data)-1,ncol(model_data)-2)
bst_0 <- xgboost(data = data.matrix(model_data[,-drop]), label = label,
max.depth =9,
eta = 0.01,
nround = 624,
nthread = 4,
objective = "binary:logistic")
model_data = data_higgs_1_cleaned_scaled
label = as.integer(model_data$Label) -1
drop = c(ncol(model_data),ncol(model_data)-1,ncol(model_data)-2)
bst_1 <- xgboost(data = data.matrix(model_data[,-drop]), label = label,
max.depth =9,
eta = 0.01,
nround = 652,
nthread = 4,
objective = "binary:logistic")
model_data = data_higgs_2_cleaned_scaled
label = as.integer(model_data$Label) -1
drop = c(ncol(model_data),ncol(model_data)-1,ncol(model_data)-2)
bst_2 <- xgboost(data = data.matrix(model_data[,-drop]), label = label,
max.depth =9,
eta = 0.01,
nround = 1104,
nthread = 4,
objective = "binary:logistic")
#############################################################
AMS <- function(real,pred,weight)
{
pred_s_ind = which(pred==1) # Index of s in prediction
real_s_ind = which(real==1) # Index of s in actual
real_b_ind = which(real==0)# Index of b in actual
s = sum(weight[intersect(pred_s_ind,real_s_ind)]) # True positive rate
b = sum(weight[intersect(pred_s_ind,real_b_ind)]) # False positive rate
b_tau = 10 # Regulator weight
ans = sqrt(2*((s+b+b_tau)*log(1+s/(b+b_tau))-s))
return(ans)
}
thresh = seq(0.04,0.13,0.001)
E= E_s = E_b = aMS = rep(0,length(thresh))
for (i in (1:length(thresh))){
model_data = data_higgs_0_cleaned_scaled
label = as.integer(model_data$Label) -1
pred_0 = predict(bst_0, data.matrix(model_data[,-drop]))
pred_0 = as.integer(pred_0 + thresh[i])
model_data = data_higgs_1_cleaned_scaled
label = as.integer(model_data$Label) -1
pred_1 = predict(bst_1, data.matrix(model_data[,-drop]))
pred_1 = as.integer(pred_1 +thresh[i])
model_data = data_higgs_2_cleaned_scaled
label = as.integer(model_data$Label) -1
pred_2 = predict(bst_2, data.matrix(model_data[,-drop]))
pred_2 = as.integer(pred_2 +thresh[i])
pred_undef = rep(0,nrow(data_higgs_undefined))
pred = rbind(c(pred_undef,pred_0,pred_1,pred_2))
response = rbind(c(data_higgs_undefined$Label,
data_higgs_0_cleaned$Label,
data_higgs_1_cleaned$Label,
data_higgs_2_cleaned$Label))
response = as.integer(response) -1
weight = rbind(c(data_higgs_undefined$Weight,
data_higgs_0_cleaned$Weight,
data_higgs_1_cleaned$Weight,
data_higgs_2_cleaned$Weight))
error = 1-sum(response == pred)/length(response)
ams = AMS(response,pred,weight)
pred_b_ind = which(pred==0)
pred_s_ind = which(pred==1)
real_s_ind = which(response==1)
real_b_ind = which(response==0)
class_error_s = 1- length(intersect(pred_s_ind,real_s_ind))/length(real_s_ind)
class_error_b = 1- length(intersect(pred_b_ind,real_b_ind))/length(real_b_ind)
E[i] =error
E_s[i] =class_error_s
E_b[i]= class_error_b
aMS[i] =ams
print(ams)
}
plot(thresh,aMS)
t0 = 0.15
t1 = 0.15
t2 = 0.15
model_data = data.submission_higgs_0_cleaned_scaled
label = as.integer(model_data$Label) -1
drop = c(ncol(model_data))
pred_0 = predict(bst_0, data.matrix(model_data[,-drop]))
pred_0 = as.integer(pred_0 + t0)
model_data = data.submission_higgs_1_cleaned_scaled
label = as.integer(model_data$Label) -1
drop = c(ncol(model_data))
pred_1 = predict(bst_1, data.matrix(model_data[,-drop]))
pred_1 = as.integer(pred_1 + t1)
model_data = data.submission_higgs_2_cleaned_scaled
label = as.integer(model_data$Label) -1
drop = c(ncol(model_data))
pred_2 = predict(bst_2, data.matrix(model_data[,-drop]))
pred_2 = as.integer(pred_2 + t2)
pred_undef = rep(0,nrow(data.submission_higgs_undefined))
pred = rbind(c(pred_undef,pred_0,pred_1,pred_2))
EventId = rbind(c(data.submission_higgs_undefined$EventId,
data.submission_higgs_0_cleaned_scaled$EventId,
data.submission_higgs_1_cleaned_scaled$EventId,
data.submission_higgs_2_cleaned_scaled$EventId))
df = as.data.frame(cbind(c(EventId),c(1:length(pred)),c(pred)))
names(df)[1] = "EventId"
names(df)[2] = "RankOrder"
names(df)[3] = "Class"
df$EventId = as.integer(df$EventId)
df$RankOrder= as.integer(df$RankOrder)
df$Class = ifelse(df$Class==0,"b","s")
write.csv(df, file = "submission.csv",row.names=FALSE,quote=FALSE)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment