Created
September 7, 2016 22:36
-
-
Save dkhurana1306/96612404d20cbe5fe3f7ffc34a7c08b0 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
path_to_data_file = '/Users/dk1306/downloads/' | |
data = read.csv(paste0(path_to_data_file,'training.csv'), header=T) | |
data.submission = read.csv(paste0(path_to_data_file,'test.csv'), header=T) | |
data[data==-999.0] = NA | |
data.submission[data.submission==-999.0] = NA | |
data$higgs_defined[(is.na(data$DER_mass_MMC))] = 0 | |
data$higgs_defined[!(is.na(data$DER_mass_MMC))] = 1 | |
data.submission$higgs_defined[(is.na(data.submission$DER_mass_MMC))] = 0 | |
data.submission$higgs_defined[!(is.na(data.submission$DER_mass_MMC))] = 1 | |
data$num_jet= data$PRI_jet_num | |
data$num_jet[data$PRI_jet_num == 3] = 2 | |
data.submission$num_jet = data.submission$PRI_jet_num | |
data.submission$num_jet[data.submission$PRI_jet_num == 3] = 2 | |
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
drop_phi = c("PRI_tau_phi","PRI_lep_phi","PRI_met_phi","PRI_jet_leading_phi","PRI_jet_subleading_phi") | |
data_phi = data[,-which(names(data) %in% drop_phi)] | |
data.submission_phi = data.submission[,-which(names(data.submission) %in% drop_phi)] | |
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
temp = data_phi | |
temp_sub = data.submission_phi | |
data_higgs_undefined = temp[data$higgs_defined == 0,] | |
data_higgs_0 = temp[data$higgs_defined == 1 & data$num_jet == 0,] | |
data_higgs_1 = temp[data$higgs_defined == 1 & data$num_jet == 1,] | |
data_higgs_2 = temp[data$higgs_defined == 1 & data$num_jet == 2,] | |
data.submission_higgs_undefined = temp_sub[data.submission$higgs_defined == 0,] | |
data.submission_higgs_0 = temp_sub[data.submission$higgs_defined == 1 & data.submission$num_jet == 0,] | |
data.submission_higgs_1 = temp_sub[data.submission$higgs_defined == 1 & data.submission$num_jet == 1,] | |
data.submission_higgs_2 = temp_sub[data.submission$higgs_defined == 1 & data.submission$num_jet == 2,] | |
drop_columns_0 = c("higgs_defined","num_jet","PRI_jet_num", "PRI_jet_leading_pt","PRI_jet_subleading_pt","PRI_jet_all_pt", | |
"DER_deltaeta_jet_jet","DER_mass_jet_jet","DER_prodeta_jet_jet","DER_lep_eta_centrality", | |
"PRI_jet_leading_eta","PRI_jet_subleading_eta") | |
drop_columns_1 = c("higgs_defined","num_jet","PRI_jet_num", "PRI_jet_leading_pt","PRI_jet_subleading_pt","DER_deltaeta_jet_jet", | |
"DER_mass_jet_jet","DER_prodeta_jet_jet","DER_lep_eta_centrality","PRI_jet_subleading_eta") | |
drop_columns_2 = c("higgs_defined","num_jet") | |
data_higgs_0_cleaned = data_higgs_0[,-which(names(data_higgs_0) %in% drop_columns_0)] | |
data_higgs_1_cleaned = data_higgs_1[,-which(names(data_higgs_1) %in% drop_columns_1)] | |
data_higgs_2_cleaned = data_higgs_2[,-which(names(data_higgs_2) %in% drop_columns_2)] | |
data.submission_higgs_0_cleaned = data.submission_higgs_0[,-which(names(data.submission_higgs_0) %in% drop_columns_0)] | |
data.submission_higgs_1_cleaned = data.submission_higgs_1[,-which(names(data.submission_higgs_1) %in% drop_columns_1)] | |
data.submission_higgs_2_cleaned = data.submission_higgs_2[,-which(names(data.submission_higgs_2) %in% drop_columns_2)] | |
data_higgs_0_cleaned_scaled = as.data.frame(scale(data_higgs_0_cleaned[,-c(1,ncol(data_higgs_0_cleaned),ncol(data_higgs_0_cleaned)-1)])) | |
data_higgs_0_cleaned_scaled$EventId = data_higgs_0_cleaned$EventId | |
data_higgs_0_cleaned_scaled$Weight = data_higgs_0_cleaned$Weight | |
data_higgs_0_cleaned_scaled$Label = data_higgs_0_cleaned$Label | |
data_higgs_1_cleaned_scaled = as.data.frame(scale(data_higgs_1_cleaned[,-c(1,ncol(data_higgs_1_cleaned),ncol(data_higgs_1_cleaned)-1)])) | |
data_higgs_1_cleaned_scaled$EventId = data_higgs_1_cleaned$EventId | |
data_higgs_1_cleaned_scaled$Weight = data_higgs_1_cleaned$Weight | |
data_higgs_1_cleaned_scaled$Label = data_higgs_1_cleaned$Label | |
data_higgs_2_cleaned_scaled = as.data.frame(scale(data_higgs_2_cleaned[,-c(1,ncol(data_higgs_2_cleaned),ncol(data_higgs_2_cleaned)-1)])) | |
data_higgs_2_cleaned_scaled$EventId = data_higgs_2_cleaned$EventId | |
data_higgs_2_cleaned_scaled$Weight = data_higgs_2_cleaned$Weight | |
data_higgs_2_cleaned_scaled$Label = data_higgs_2_cleaned$Label | |
data.submission_higgs_0_cleaned_scaled = as.data.frame(scale(data.submission_higgs_0_cleaned[,-1])) | |
data.submission_higgs_0_cleaned_scaled$EventId = data.submission_higgs_0_cleaned$EventId | |
data.submission_higgs_1_cleaned_scaled = as.data.frame(scale(data.submission_higgs_1_cleaned[,-1])) | |
data.submission_higgs_1_cleaned_scaled$EventId = data.submission_higgs_1_cleaned$EventId | |
data.submission_higgs_2_cleaned_scaled = as.data.frame(scale(data.submission_higgs_2_cleaned[,-1])) | |
data.submission_higgs_2_cleaned_scaled$EventId = data.submission_higgs_2_cleaned$EventId |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment