Skip to content

Instantly share code, notes, and snippets.

@bricksdont
Last active September 22, 2020 07:02
Show Gist options
  • Save bricksdont/2f5b0b0ba54997c7d622d8f8f8538dbe to your computer and use it in GitHub Desktop.
Save bricksdont/2f5b0b0ba54997c7d622d8f8f8538dbe to your computer and use it in GitHub Desktop.
#! /bin/bash
chunk_input_dir=$1
chunk_output_dir=$2
chunk_prefix=$3
chunk_index=$4
model_paths=$5
batch_size=$6
OMP_NUM_THREADS=3 python -m sockeye.translate \
-i $chunk_input_dir/$chunk_prefix"$chunk_index" \
-o $chunk_output_dir/$chunk_prefix"$chunk_index" \
-m $model_paths \
--beam-size 10 \
--length-penalty-alpha 1.0 \
--device-ids 0 \
--batch-size $batch_size
#! /bin/bash
base=[path on your machine]
# vars you need to define:
# corpus, e.g. "test"
corpus=test
# model_name, e.g. "baseline
model_name=baseline
# model_paths
$base/models/baseline
# batch_size
64
# chunk_size (in sentences)
100000
data=$base/data
scripts=$base/scripts
translations=$base/translations
src=en
trg=de
mkdir -p $translations
mkdir -p $translations/$model_name
chunk_prefix="$corpus.bpe.$model_name.chunk."
chunk_input_dir=$translations/$model_name/chunk_inputs
chunk_output_dir=$translations/$model_name/chunk_outputs
chunk_log_dir=$translations/$model_name/chunk_logs
mkdir -p $chunk_input_dir
mkdir -p $chunk_output_dir
mkdir -p $chunk_log_dir
# splitting input file into chunks
zless $data/$corpus.$src | split -d -l $chunk_size -a 3 - $chunk_input_dir/$chunk_prefix
# get number of chunk files generated
num_chunks=`ls $chunk_input_dir | wc -l`
echo "Number of chunks found: $num_chunks"
# translating individual chunks
for chunk_index in $(seq -f "%03g" 0 $(($num_chunks - 1))); do
. $scripts/decode_chunk.sh \
$chunk_input_dir $chunk_output_dir $chunk_prefix $chunk_index $model_paths $batch_size
done
# move logs out of the way
mv $chunk_output_dir/*.log $chunk_log_dir/
# concatenating results
cat $chunk_output_dir/$chunk_prefix* > $translations/$model_name/$corpus.bpe.$model_name.$trg
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment