Skip to content

Instantly share code, notes, and snippets.

@bricksdont
Last active January 20, 2020 09:56
Show Gist options
  • Save bricksdont/44e9dc753d884d6c6c3691ebddc9fd9a to your computer and use it in GitHub Desktop.
Save bricksdont/44e9dc753d884d6c6c3691ebddc9fd9a to your computer and use it in GitHub Desktop.
Sockeye parallel decoding
#! /bin/bash
chunk_input_dir=$1
chunk_output_dir=$2
chunk_prefix=$3
chunk_index=$4
model_paths=$5
batch_size=$6
OMP_NUM_THREADS=3 python -m sockeye.translate \
-i $chunk_input_dir/$chunk_prefix"$chunk_index" \
-o $chunk_output_dir/$chunk_prefix"$chunk_index" \
-m $model_paths \
--beam-size 10 \
--length-penalty-alpha 1.0 \
--device-ids 0 \
--batch-size $batch_size \
--disable-device-locking
#! /bin/bash
source /net/cephfs/home/mathmu/scratch/goeckeritz-model/venvs/sockeye3/bin/activate
module unuse /apps/etc/modules/start/
module use /sapps/etc/modules/start/
module load volta cuda/10.0
base=/net/cephfs/home/mathmu/scratch/goeckeritz-model
# vars set by calling process:
# corpus
# model_name
# model_paths
# batch_size
# chunk_size
data=$base/data
scripts=$base/scripts
translations=$base/translations
src=en
trg=de
mkdir -p $translations
mkdir -p $translations/$model_name
chunk_prefix="$corpus.bpe.$model_name.chunk."
chunk_input_dir=$translations/$model_name/chunk_inputs
chunk_output_dir=$translations/$model_name/chunk_outputs
chunk_log_dir=$translations/$model_name/chunk_logs
mkdir -p $chunk_input_dir
mkdir -p $chunk_output_dir
mkdir -p $chunk_log_dir
# splitting input file into chunks
zless $data/$corpus.$src | split -d -l $chunk_size -a 3 - $chunk_input_dir/$chunk_prefix
# get number of chunk files generated
num_chunks=`ls $chunk_input_dir | wc -l`
echo "Number of chunks found: $num_chunks"
# translating individual chunks
for chunk_index in $(seq -f "%03g" 0 $(($num_chunks - 1))); do
sbatch --qos=vesta --time=1:00:00 --gres gpu:Tesla-V100:1 --cpus-per-task 3 --mem 48g $scripts/decode_chunk.sh \
$chunk_input_dir $chunk_output_dir $chunk_prefix $chunk_index $model_paths $batch_size
done
# query queue to see if finished
# note: this might not work if you have other unrelated tasks in the queue
while [[ `squeue -u mathmu -o "%.45j" | grep "chunk" | wc -l` != 0 ]]; do
echo "Waiting for chunk decoding to finish, sleep 1000"
sleep 1000
done
# move logs out of the way
mv $chunk_output_dir/*.log $chunk_log_dir/
# concatenating results
cat $chunk_output_dir/$chunk_prefix* > $translations/$model_name/$corpus.bpe.$model_name.$trg
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment