Created
May 10, 2018 15:41
-
-
Save lunik1/240b67b8fdc88161ec9985aa160221f6 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Directory containing input files for the classifier. | |
# The input directory is expected to contain a ROOT file for each process named | |
# histofile_$PROCESS.root. This should contain a Ttree named | |
# Ttree_$PROCESS. Additional trees named with the schema | |
# Ttree_$PROCESS__$SYSTEMATIC__(plus/minus) should be included for each | |
# relevant shape nuisance. The Ttrees should contain a branch for each | |
# observable and a branch containing event weights. | |
input_dir: /scratch/data/TopPhysics/mvaDirs/inputs/2016/all/mz50mw50/ | |
# Random number generation seed | |
seed: 52 | |
# ROOT selection string specifying the cuts that should be made before | |
# classifier training takes place. | |
selection: >- | |
zMass > 71.2 && zMass < 111.2 && | |
wPairMass > 60.4 && wPairMass < 100.4 && | |
chi2 < 40 && | |
Channel == 1 | |
# Name of the channel, only used to name output and does not apply any selection | |
channel: ee | |
# List of channels which should be considered signal | |
signals: | |
- tZq | |
# List of channels which should be considered background | |
backgrounds: | |
- DYToLL_M10To50_aMCatNLO | |
- DYToLL_M50_aMCatNLO | |
- FakeEG | |
- FakeMu | |
- TbartChan | |
- TbartW | |
- THQ | |
- TsChan | |
- TT | |
- TtChan | |
- ttH | |
- TTW | |
- TtW | |
- TTZ | |
- TWZ | |
- Wjets | |
- WW | |
- WWW | |
- WWZ | |
- WZ | |
- WZZ | |
- ZZ | |
- ZZZ | |
# Name of process containing collision data | |
# The location of the true data needs to be known when combine/THETA output is | |
# generated | |
data_process: DataEG | |
# Directories plots, root files, and trained classifiers should be output into | |
plot_dir: plots/ | |
root_dir: root/ | |
mva_dir: mva/ | |
# Fraction of data to be reserved in test sample | |
test_fraction: 0.25 | |
# If true, the weights of the signal channels are linearly scaled so that the | |
# overall normalisation for both the signal and background channels is the same | |
equalise_signal: true | |
# How negative event weights should be treated | |
# passthrough: negative weights are unaltered | |
# abs: the absolute value of all negative weights is taken | |
# reweight: The absolute value of all negative weights is taken. The original | |
# normalisation for each process is then restored by linearly | |
# scaling the resulting weights down. This will fail if any | |
# processes have an overall negative weight. | |
# zero: negative weights are set to 0 | |
negative_weight_treatment: passthrough | |
# Classifier selection | |
# bdt_grad: Gradient Boosted Decision Tree (scikit-learn) | |
# bdt_xgb: Gradient Boosted Decision Tree (XGBoost) | |
# bdt_lgbm: Gradient Boosted Decision Tree (LightGBM) | |
# random_forest: Random Forest | |
# mlp: Multi-Layer Perceptron (Keras) | |
# load: load classfier specfied by classifer_path option | |
classifier: bdt_grad | |
# BDT configuration. Passed to scikit-learn's GradientBoostingClassifier() | |
# See scikit-learn documentation for more information | |
bdt_grad: | |
n_estimators: 100 | |
verbose: 1 | |
min_samples_split: 0.1 | |
subsample: 0.75 | |
learning_rate: 0.02 | |
max_depth: 5 | |
# Options governing the root file output | |
root_out: | |
# Whether output should be in the format for combine (true) or THETA (false) | |
combine: true | |
# What form the (pseudo)-data in the files should take | |
# empty: Empty histograms | |
# poisson: Sum the Monte Carlo histograms, and perform a Poisson jump on | |
# each bin | |
# real: Use the real data | |
data: empty | |
# The strategy used to bin the MVA response in the resulting root files | |
# equal: specified number of equal-width bins in the (0, 1) range | |
# (default). | |
# quantile: specified number of equally-populated bins, achieved by | |
# placing bin edges at quantiles. Bin population does not take | |
# event weight into account. | |
# recursive_median: response is recursively bisected at the median | |
strategy: equal | |
# Set the number of bins for the equal or quantile binning stategies | |
bins: 20 | |
# The recursive binning strategies will stop splitting once these limits | |
# are reached | |
min_signal_events: 0 | |
min_background_events: 1 | |
max_signal_error: 0.3 | |
max_background_error: 0.3 | |
# Features to be included in the classifier training | |
features: | |
- bTagDisc | |
- fourthJetPt | |
- jetMass | |
- jjdelR | |
- leadJetEta |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment