-
-
Save joelgrus/7cdb8fb2d81483a8d9ca121d9c617514 to your computer and use it in GitHub Desktop.
// Configuration for a named entity recognization model based on: | |
// Peters, Matthew E. et al. “Deep contextualized word representations.” NAACL-HLT (2018). | |
{ | |
"dataset_reader": { | |
"type": "conll2003", | |
"tag_label": "ner", | |
"coding_scheme": "BIOUL", | |
"token_indexers": { | |
"bert": { | |
"type": "bert-pretrained", | |
"pretrained_model": std.extVar("BERT_VOCAB"), | |
"do_lowercase": false, | |
"use_starting_offsets": true | |
}, | |
"token_characters": { | |
"type": "characters", | |
"min_padding_length": 3 | |
} | |
} | |
}, | |
"train_data_path": std.extVar("NER_TRAIN_DATA_PATH"), | |
"validation_data_path": std.extVar("NER_TEST_A_PATH"), | |
"test_data_path": std.extVar("NER_TEST_B_PATH"), | |
"model": { | |
"type": "crf_tagger", | |
"label_encoding": "BIOUL", | |
"constrain_crf_decoding": true, | |
"calculate_span_f1": true, | |
"dropout": 0.5, | |
"include_start_end_transitions": false, | |
"text_field_embedder": { | |
"allow_unmatched_keys": true, | |
"embedder_to_indexer_map": { | |
"bert": ["bert", "bert-offsets"], | |
"token_characters": ["token_characters"], | |
}, | |
"token_embedders": { | |
"bert": { | |
"type": "bert-pretrained", | |
"pretrained_model": std.extVar("BERT_WEIGHTS") | |
}, | |
"token_characters": { | |
"type": "character_encoding", | |
"embedding": { | |
"embedding_dim": 16 | |
}, | |
"encoder": { | |
"type": "cnn", | |
"embedding_dim": 16, | |
"num_filters": 128, | |
"ngram_filter_sizes": [3], | |
"conv_layer_activation": "relu" | |
} | |
} | |
} | |
}, | |
"encoder": { | |
"type": "lstm", | |
"input_size": 768 + 128, | |
"hidden_size": 200, | |
"num_layers": 2, | |
"dropout": 0.5, | |
"bidirectional": true | |
}, | |
}, | |
"iterator": { | |
"type": "basic", | |
"batch_size": 64 | |
}, | |
"trainer": { | |
"optimizer": { | |
"type": "adam", | |
"lr": 0.001 | |
}, | |
"validation_metric": "+f1-measure-overall", | |
"num_serialized_models_to_keep": 3, | |
"num_epochs": 75, | |
"grad_norm": 5.0, | |
"patience": 25, | |
"cuda_device": 0 | |
} | |
} |
@joelgrus can you kindly share F1 scores you get with the configuration file? I am looking for any public code that reproduces BERT results on CoNLL 2003 English, but all I have found are much weaker than what was reported in the paper.
@joelgrus
In this case, during the train phase only the model on top of BERT (here, crf_tagger) is trained, and BERT model itself is not trained, right?
During the training phase, I'd like to know the ways something like:
model.train()
bert_model.train()
...
for epoch in config.epoch:
...
#to calculate gradients for both
opt_bert.step()
my_model.step()
Hi, how does the parsing for the config file work - for example, can I just set the entire config to be:
"token_embedders": { "bert": { "type": "bert-pretrained", "pretrained_model": std.extVar("BERT_WEIGHTS") }, "token_characters": { "type": "character_encoding", "embedding": { "embedding_dim": 16 }, "encoder": { "type": "cnn", "embedding_dim": 16, "num_filters": 128, "ngram_filter_sizes": [3], "conv_layer_activation": "relu" } } }
Or the config file follows certain requirements (which ones)?
Thank you.
It always gives me a Key error for "bert-offsets", why is that?
Using CNN / DM data set how do I have to preprocess it to use it.
@joelgrus Is there a reason you don't use bert_adam / L2 regularization here, as the BERT paper / HuggingFace reimplementation do? And would the right way to use BERT's normal optimizer just to change the optimizer type from "adam" to "bert_adam"?