Skip to content

Instantly share code, notes, and snippets.

@mtreviso
Created November 12, 2020 23:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mtreviso/f95f4498c7f71079b3e5d07840c2bc89 to your computer and use it in GitHub Desktop.
Save mtreviso/f95f4498c7f71079b3e5d07840c2bc89 to your computer and use it in GitHub Desktop.
name: "get_attention_wmt_ende_best"
data:
src: "en"
trg: "de"
train: "data/corpus/iwslt17-ben/en-de/train"
dev: "data/corpus/iwslt17-ben/en-de/valid"
test: "data/corpus/iwslt17-ben/en-de/train"
level: "word"
lowercase: False
max_sent_length: 50
src_voc_min_freq: 1
trg_voc_min_freq: 1
testing:
beam_size: 0 # greedy = 0
alpha: 1.0
ckpt: "data/saved-translation-models/iwslt-ende-bahdanau-sparsemax-new/best.ckpt"
output_path: "data/saved-translation-models/iwslt-ende-bahdanau-sparsemax-new/out"
save_attention: True
training:
random_seed: 42
optimizer: "adam"
learning_rate: 0.001
learning_rate_min: 0.00005
batch_size: 32
scheduling: "plateau"
patience: 8
decrease_factor: 0.5
early_stopping_metric: "ppl"
epochs: 100
validation_freq: 2359
logging_freq: 500
eval_metric: "bleu"
model_dir: "data/saved-translation-models/iwslt-ende-bahdanau-sparsemax-new"
overwrite: False
shuffle: True
use_cuda: True
max_output_length: 100
print_valid_sents: [0, 1, 2, 3, 4]
model:
tied_embeddings: False # True only for bpe
encoder:
rnn_type: "lstm"
embeddings:
embedding_dim: 512
scale: False
hidden_size: 512
bidirectional: True
dropout: 0.3
num_layers: 2
decoder:
rnn_type: "lstm"
embeddings:
embedding_dim: 512
scale: False
emb_scale: False
hidden_size: 512
dropout: 0.3
hidden_dropout: 0.3
num_layers: 2
input_feeding: True
init_hidden: "last"
attention: "bahdanau"
attn_func: "sparsemax"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment