hqucms/JetTaggingTutorial.md

## JetTaggingTutorial.md

      
    Raw
  

              JetTaggingTutorial.md
            
          
    Jet Tagging Tutorial

Setup Weaver and weaver-benchmark

# prerequisite: install the dependent packages
# https://github.com/hqucms/weaver#set-up-your-environmen

git clone https://github.com/hqucms/weaver.git
cd weaver
git pull # update to the latest status
git clone https://github.com/hqucms/weaver-benchmark.git
Download and convert the dataset

# in the weaver/ directory
mkdir top-dataset
cd top-dataset
# download the top-tagging dataset
curl -O 'https://zenodo.org/record/2603256/files/train.h5'
curl -O 'https://zenodo.org/record/2603256/files/val.h5'
curl -O 'https://zenodo.org/record/2603256/files/test.h5'
cd ..
# back in the weaver/ directory
# convert the h5 files to awkward arrays
python weaver-benchmark/utils/convert_top_datasets.py -i top-dataset/ -o top-dataset/converted
Training the ParticleNet model

# in the weaver/ directory
python train.py \
 --data-train top-dataset/converted/train_file_0.awkd \
 --data-val top-dataset/converted/val_file_0.awkd \
 --data-test top-dataset/converted/test_file_0.awkd \
 --data-config weaver-benchmark/data/top/pf_points_features.yaml \
 --network-config weaver-benchmark/networks/top/particlenet_pf.py \
 --model-prefix outputs/{auto}/net \
 --predict-output pred.root \
 --num-workers 1 --fetch-step 1 --data-fraction 1 \
 --gpus 0 --batch-size 128 --num-epochs 20 --start-lr 5e-3 --optimizer ranger \
 --log logs/{auto}.log --tensorboard _particle_net
Training the Deep Set / Particle Flow Network

# in the weaver/ directory
python train.py \
 --data-train top-dataset/converted/train_file_0.awkd \
 --data-val top-dataset/converted/val_file_0.awkd \
 --data-test top-dataset/converted/test_file_0.awkd \
 --data-config weaver-benchmark/data/top/pf_features_mask.yaml \
 --network-config weaver-benchmark/networks/top/pfn_pf.py \
 --model-prefix outputs/{auto}/net \
 --predict-output pred.root \
 --num-workers 1 --fetch-step 1 --data-fraction 1 \
 --gpus 1 --batch-size 128 --num-epochs 20 --start-lr 5e-3 --optimizer ranger \
 --log logs/{auto}.log --tensorboard _pfn
Tensorboard

# in the weaver/ directory
tensorboard --logdir=runs
# open tensorboard in the web browser
# http://localhost:6006