Created
November 5, 2020 21:43
-
-
Save bricksdont/1574a5d64136c2816e224abedb9ceb99 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /bin/bash | |
# change base path before running this | |
base=/net/cephfs/scratch/mathmu/map-volatility/debug/complete | |
# install | |
virtualenv -p python3 $base/venvs/sockeye3 | |
source $base/venvs/sockeye3/bin/activate | |
# CUDA version on instance | |
CUDA_VERSION=102 | |
wget https://raw.githubusercontent.com/awslabs/sockeye/master/requirements/requirements.gpu-cu${CUDA_VERSION}.txt | |
# match my versions exactly | |
sed -i 's/mxnet-cu102==1.7.0/mxnet-cu102mkl==1.6.0.post0/g' requirements.gpu-cu${CUDA_VERSION}.txt | |
pip install sockeye==2.1.21 --no-deps -r requirements.gpu-cu${CUDA_VERSION}.txt | |
rm requirements.gpu-cu${CUDA_VERSION}.txt | |
pip install matplotlib mxboard requests seaborn | |
# download model and data | |
if [[ ! -d $base/dan-epo.baseline ]]; then | |
wget https://files.ifi.uzh.ch/cl/archiv/2020/clcontra/dan-epo.baseline.tar.gz | |
tar -xzvf $base/dan-epo.baseline.tar.gz | |
fi | |
num=100 | |
# produce list of nbest samples | |
OMP_NUM_THREADS=1 python -m sockeye.translate \ | |
-i $base/dan-epo.baseline/test.pieces.src \ | |
-o $base/output.nbest \ | |
-m $base/dan-epo.baseline \ | |
--sample \ | |
--beam-size $num \ | |
--nbest-size $num \ | |
--seed 101 \ | |
--length-penalty-alpha 1.0 \ | |
--device-ids 0 \ | |
--batch-size 16 \ | |
--disable-device-locking | |
# sample one sequence with different seeds (beam size 2 to get around potential issues with skipping softmax) | |
for seed in {1..100}; do | |
OMP_NUM_THREADS=1 python -m sockeye.translate \ | |
-i $base/dan-epo.baseline/test.pieces.src \ | |
-o $base/output.single.$seed \ | |
-m $base/dan-epo.baseline \ | |
--sample \ | |
--beam-size 2 \ | |
--nbest-size 1 \ | |
--seed $seed \ | |
--length-penalty-alpha 1.0 \ | |
--device-ids 0 \ | |
--batch-size 64 \ | |
--disable-device-locking | |
done | |
# evaluate all | |
for seed in {1..100}; do | |
cat $base/output.single.$seed | sed 's/ //g;s/▁/ /g' | sacrebleu $base/dan-epo.baseline/test.trg -w 2 > $base/output.single.$seed.bleu | |
cat $base/output.nbest | python $base/dan-epo.baseline/extract_translation_at_index_from_nbest.py --pos $seed | sed 's/ //g;s/▁/ /g' | sacrebleu $base/dan-epo.baseline/test.trg -w 2 > $base/output.nbest.$seed.bleu | |
done | |
# visualize both | |
python $base/dan-epo.baseline/visualize.py --inputs $base/output.single.{1..100}.bleu --output $base/graph.single.png | |
python $base/dan-epo.baseline/visualize.py --inputs $base/output.nbest.{1..100}.bleu --output $base/graph.nbest.png |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment