This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
path_to_data_file = '/Users/dk1306/downloads/' | |
data = read.csv(paste0(path_to_data_file,'training.csv'), header=T) | |
data.submission = read.csv(paste0(path_to_data_file,'test.csv'), header=T) | |
data[data==-999.0] = NA | |
data.submission[data.submission==-999.0] = NA | |
aggr(data) # Display Missingness in training data | |
aggr(data.submission) # Display Missingness in submission data |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
colnames(data)[colSums(is.na(data))!=0] | |
# [1] "DER_mass_MMC" "DER_deltaeta_jet_jet" "DER_mass_jet_jet" "DER_prodeta_jet_jet" | |
# [5] "DER_lep_eta_centrality" "PRI_jet_leading_pt" "PRI_jet_leading_eta" "PRI_jet_leading_phi" | |
# [9] "PRI_jet_subleading_pt" "PRI_jet_subleading_eta" "PRI_jet_subleading_phi" | |
colnames(data.submission)[colSums(is.na(data.submission))!=0] | |
# [1] "DER_mass_MMC" "DER_deltaeta_jet_jet" "DER_mass_jet_jet" "DER_prodeta_jet_jet" | |
# [5] "DER_lep_eta_centrality" "PRI_jet_leading_pt" "PRI_jet_leading_eta" "PRI_jet_leading_phi" | |
# [9] "PRI_jet_subleading_pt" "PRI_jet_subleading_eta" "PRI_jet_subleading_phi" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
data$higgs_defined[(is.na(data$DER_mass_MMC))] = 0 | |
data$higgs_defined[!(is.na(data$DER_mass_MMC))] = 1 | |
data$num_jet= data$PRI_jet_num | |
data$num_jet[data$PRI_jet_num == 3] = 2 | |
data.submission$higgs_defined[(is.na(data.submission$DER_mass_MMC))] = 0 | |
data.submission$higgs_defined[!(is.na(data.submission$DER_mass_MMC))] = 1 | |
data.submission$num_jet = data.submission$PRI_jet_num | |
data.submission$num_jet[data.submission$PRI_jet_num == 3] = 2 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
length(data$Label[data$higgs_defined==0 & data$Label == 's']) / length(data$Label[data$Label == 's']) # fraction of signal where higgs mass in undefined | |
#[1] 0.03309326 | |
summary(data$Weight[data$higgs_defined==0 & data$Label == 's']) | |
# Min. 1st Qu. Median Mean 3rd Qu. Max. | |
#0.001502 0.001502 0.001503 0.003117 0.002653 0.018640 | |
summary(data$Weight[data$higgs_defined==1 & data$Label == 's']) | |
# Min. 1st Qu. Median Mean 3rd Qu. Max. | |
#0.001502 0.001503 0.001503 0.008247 0.018640 0.018640 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
data_higgs_undefined = data[data$higgs_defined == 0,] | |
data_higgs_0 = data[data$higgs_defined == 1 & data$num_jet == 0,] | |
data_higgs_1 = data[data$higgs_defined == 1 & data$num_jet == 1,] | |
data_higgs_2_3 = data[data$higgs_defined == 1 & data$num_jet == 2,] | |
data.submission_higgs_undefined = data.submission[data.submission$higgs_defined == 0,] | |
data.submission_higgs_0 = data.submission[data.submission$higgs_defined == 1 & data.submission$num_jet == 0,] | |
data.submission_higgs_1 = data.submission[data.submission$higgs_defined == 1 & data.submission$num_jet == 1,] | |
data.submission_higgs_2_3 = data.submission[data.submission$higgs_defined == 1 & data.submission$num_jet == 2,] | |
drop_columns_0 = as.character(c("DER_deltaeta_jet_jet","DER_mass_jet_jet","DER_prodeta_jet_jet","DER_lep_eta_centrality", |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
path_to_data_file = '/Users/dk1306/downloads/' | |
data = read.csv(paste0(path_to_data_file,'training.csv'), header=T) | |
data.submission = read.csv(paste0(path_to_data_file,'test.csv'), header=T) | |
data[data==-999.0] = NA | |
data.submission[data.submission==-999.0] = NA | |
data$higgs_defined[(is.na(data$DER_mass_MMC))] = 0 | |
data$higgs_defined[!(is.na(data$DER_mass_MMC))] = 1 | |
data.submission$higgs_defined[(is.na(data.submission$DER_mass_MMC))] = 0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
nround <- 1500 | |
param <- list(max_depth=9, eta=0.01 , silent=1, nthread=4, objective='binary:logistic') | |
model_data = data_higgs_0_cleaned_scaled | |
label = as.integer(model_data$Label) -1 | |
drop = c(ncol(model_data),ncol(model_data)-1,ncol(model_data)-2) | |
dtrain <- xgb.DMatrix(data.matrix(model_data[,-drop]),label = label) | |
cv = xgb.cv(param, dtrain, nround, nfold=5, metrics={'auc'}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
############################################################# | |
model_data = data_higgs_0_cleaned_scaled | |
label = as.integer(model_data$Label) -1 | |
drop = c(ncol(model_data),ncol(model_data)-1,ncol(model_data)-2) | |
bst_0 <- xgboost(data = data.matrix(model_data[,-drop]), label = label, | |
max.depth =9, | |
eta = 0.01, | |
nround = 624, | |
nthread = 4, |