Skip to content

Instantly share code, notes, and snippets.

@dkhurana1306
Created September 7, 2016 22:36
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dkhurana1306/96612404d20cbe5fe3f7ffc34a7c08b0 to your computer and use it in GitHub Desktop.
Save dkhurana1306/96612404d20cbe5fe3f7ffc34a7c08b0 to your computer and use it in GitHub Desktop.
path_to_data_file = '/Users/dk1306/downloads/'
data = read.csv(paste0(path_to_data_file,'training.csv'), header=T)
data.submission = read.csv(paste0(path_to_data_file,'test.csv'), header=T)
data[data==-999.0] = NA
data.submission[data.submission==-999.0] = NA
data$higgs_defined[(is.na(data$DER_mass_MMC))] = 0
data$higgs_defined[!(is.na(data$DER_mass_MMC))] = 1
data.submission$higgs_defined[(is.na(data.submission$DER_mass_MMC))] = 0
data.submission$higgs_defined[!(is.na(data.submission$DER_mass_MMC))] = 1
data$num_jet= data$PRI_jet_num
data$num_jet[data$PRI_jet_num == 3] = 2
data.submission$num_jet = data.submission$PRI_jet_num
data.submission$num_jet[data.submission$PRI_jet_num == 3] = 2
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
drop_phi = c("PRI_tau_phi","PRI_lep_phi","PRI_met_phi","PRI_jet_leading_phi","PRI_jet_subleading_phi")
data_phi = data[,-which(names(data) %in% drop_phi)]
data.submission_phi = data.submission[,-which(names(data.submission) %in% drop_phi)]
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
temp = data_phi
temp_sub = data.submission_phi
data_higgs_undefined = temp[data$higgs_defined == 0,]
data_higgs_0 = temp[data$higgs_defined == 1 & data$num_jet == 0,]
data_higgs_1 = temp[data$higgs_defined == 1 & data$num_jet == 1,]
data_higgs_2 = temp[data$higgs_defined == 1 & data$num_jet == 2,]
data.submission_higgs_undefined = temp_sub[data.submission$higgs_defined == 0,]
data.submission_higgs_0 = temp_sub[data.submission$higgs_defined == 1 & data.submission$num_jet == 0,]
data.submission_higgs_1 = temp_sub[data.submission$higgs_defined == 1 & data.submission$num_jet == 1,]
data.submission_higgs_2 = temp_sub[data.submission$higgs_defined == 1 & data.submission$num_jet == 2,]
drop_columns_0 = c("higgs_defined","num_jet","PRI_jet_num", "PRI_jet_leading_pt","PRI_jet_subleading_pt","PRI_jet_all_pt",
"DER_deltaeta_jet_jet","DER_mass_jet_jet","DER_prodeta_jet_jet","DER_lep_eta_centrality",
"PRI_jet_leading_eta","PRI_jet_subleading_eta")
drop_columns_1 = c("higgs_defined","num_jet","PRI_jet_num", "PRI_jet_leading_pt","PRI_jet_subleading_pt","DER_deltaeta_jet_jet",
"DER_mass_jet_jet","DER_prodeta_jet_jet","DER_lep_eta_centrality","PRI_jet_subleading_eta")
drop_columns_2 = c("higgs_defined","num_jet")
data_higgs_0_cleaned = data_higgs_0[,-which(names(data_higgs_0) %in% drop_columns_0)]
data_higgs_1_cleaned = data_higgs_1[,-which(names(data_higgs_1) %in% drop_columns_1)]
data_higgs_2_cleaned = data_higgs_2[,-which(names(data_higgs_2) %in% drop_columns_2)]
data.submission_higgs_0_cleaned = data.submission_higgs_0[,-which(names(data.submission_higgs_0) %in% drop_columns_0)]
data.submission_higgs_1_cleaned = data.submission_higgs_1[,-which(names(data.submission_higgs_1) %in% drop_columns_1)]
data.submission_higgs_2_cleaned = data.submission_higgs_2[,-which(names(data.submission_higgs_2) %in% drop_columns_2)]
data_higgs_0_cleaned_scaled = as.data.frame(scale(data_higgs_0_cleaned[,-c(1,ncol(data_higgs_0_cleaned),ncol(data_higgs_0_cleaned)-1)]))
data_higgs_0_cleaned_scaled$EventId = data_higgs_0_cleaned$EventId
data_higgs_0_cleaned_scaled$Weight = data_higgs_0_cleaned$Weight
data_higgs_0_cleaned_scaled$Label = data_higgs_0_cleaned$Label
data_higgs_1_cleaned_scaled = as.data.frame(scale(data_higgs_1_cleaned[,-c(1,ncol(data_higgs_1_cleaned),ncol(data_higgs_1_cleaned)-1)]))
data_higgs_1_cleaned_scaled$EventId = data_higgs_1_cleaned$EventId
data_higgs_1_cleaned_scaled$Weight = data_higgs_1_cleaned$Weight
data_higgs_1_cleaned_scaled$Label = data_higgs_1_cleaned$Label
data_higgs_2_cleaned_scaled = as.data.frame(scale(data_higgs_2_cleaned[,-c(1,ncol(data_higgs_2_cleaned),ncol(data_higgs_2_cleaned)-1)]))
data_higgs_2_cleaned_scaled$EventId = data_higgs_2_cleaned$EventId
data_higgs_2_cleaned_scaled$Weight = data_higgs_2_cleaned$Weight
data_higgs_2_cleaned_scaled$Label = data_higgs_2_cleaned$Label
data.submission_higgs_0_cleaned_scaled = as.data.frame(scale(data.submission_higgs_0_cleaned[,-1]))
data.submission_higgs_0_cleaned_scaled$EventId = data.submission_higgs_0_cleaned$EventId
data.submission_higgs_1_cleaned_scaled = as.data.frame(scale(data.submission_higgs_1_cleaned[,-1]))
data.submission_higgs_1_cleaned_scaled$EventId = data.submission_higgs_1_cleaned$EventId
data.submission_higgs_2_cleaned_scaled = as.data.frame(scale(data.submission_higgs_2_cleaned[,-1]))
data.submission_higgs_2_cleaned_scaled$EventId = data.submission_higgs_2_cleaned$EventId
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment