Skip to content

Instantly share code, notes, and snippets.

@allanj
Created October 16, 2019 03:30
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save allanj/6be93aa93e1fd444c4665d34a8dbea59 to your computer and use it in GitHub Desktop.
Save allanj/6be93aa93e1fd444c4665d34a8dbea59 to your computer and use it in GitHub Desktop.
Coreference with BERT implementation using Latest AllenNLP package (0.9.0)
local bert_model = "bert-base-uncased";
local train_path = "./datasets/coref/train.english.v4_gold_conll";
local dev_path = "./datasets/coref/dev.english.v4_gold_conll";
local test_path = "./datasets/coref/test.english.v4_gold_conll";
{
"dataset_reader": {
"type": "coref",
"token_indexers": {
"bert": {
"type": "pretrained_transformer",
"model_name": "bert-base-uncased",
"do_lowercase": true
}
},
"max_span_width": 10
},
"train_data_path": train_path,
"validation_data_path": dev_path,
"test_data_path": test_path,
"evaluate_on_test": true,
"model": {
"type": "coref",
"text_field_embedder": {
"allow_unmatched_keys": true,
"token_embedders": {
"bert": {
"type": "pretrained_transformer",
"model_name": "bert-base-uncased"
//"requires_grad": true,
//"top_layer_only": true
}
}
},
"context_layer": {
"type": "lstm",
"bidirectional": true,
"input_size": 768,
"hidden_size": 200,
"num_layers": 1
},
"mention_feedforward": {
"input_dim": 1588,
"num_layers": 2,
"hidden_dims": 150,
"activations": "relu",
"dropout": 0.2
},
"antecedent_feedforward": {
"input_dim": 4784,
"num_layers": 2,
"hidden_dims": 150,
"activations": "relu",
"dropout": 0.2
},
"initializer": [
[".*linear_layers.*weight", {"type": "xavier_normal"}],
[".*scorer._module.weight", {"type": "xavier_normal"}],
["_distance_embedding.weight", {"type": "xavier_normal"}],
["_span_width_embedding.weight", {"type": "xavier_normal"}],
["_context_layer._module.weight_ih.*", {"type": "xavier_normal"}],
["_context_layer._module.weight_hh.*", {"type": "orthogonal"}]
],
"lexical_dropout": 0.5,
"feature_size": 20,
"max_span_width": 10,
"spans_per_word": 0.4,
"max_antecedents": 150
},
"iterator": {
"type": "bucket",
"sorting_keys": [["text", "num_tokens"]],
"padding_noise": 0.0,
"batch_size": 1
},
"trainer": {
"num_epochs": 150,
"grad_norm": 5.0,
"patience" : 10,
"cuda_device" : 3,
"validation_metric": "+coref_f1",
"learning_rate_scheduler": {
"type": "reduce_on_plateau",
"factor": 0.5,
"mode": "max",
"patience": 2
},
"optimizer": {
"type": "adam",
"parameter_groups": [
[[".*bert.*"], {"lr": 1e-5}],
[["_context.*", "_antecedent.*","_mention.*", "_endpoint", "_attentive.*", "_distance.*"], {"lr": 1e-3}]
]
}
}
}
@allanj
Copy link
Author

allanj commented Oct 16, 2019

Usage:

allennlp train coref_bert.jsonnet -s ANY_FOLDER_NAME_TO_SAVE_MODEL_FILES

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment