allanj/coref_bert.jsonnet

## coref_bert.jsonnet
local bert_model = "bert-base-uncased";
local train_path = "./datasets/coref/train.english.v4_gold_conll";
local dev_path = "./datasets/coref/dev.english.v4_gold_conll";
local test_path = "./datasets/coref/test.english.v4_gold_conll";

{
  "dataset_reader": {
    "type": "coref",
    "token_indexers": {
      "bert": {
            "type": "pretrained_transformer",
            "model_name": "bert-base-uncased",
            "do_lowercase": true
        }
    },
    "max_span_width": 10
  },
  "train_data_path": train_path,
  "validation_data_path": dev_path,
  "test_data_path": test_path,
  "evaluate_on_test": true,
  "model": {
    "type": "coref",
    "text_field_embedder": {
        "allow_unmatched_keys": true,
        "token_embedders": {
            "bert": {
                "type": "pretrained_transformer",
                "model_name": "bert-base-uncased"
                //"requires_grad": true,
                //"top_layer_only": true
            }
        }
    },
    "context_layer": {
        "type": "lstm",
        "bidirectional": true,
        "input_size": 768,
        "hidden_size": 200,
        "num_layers": 1
    },
    "mention_feedforward": {
        "input_dim": 1588,
        "num_layers": 2,
        "hidden_dims": 150,
        "activations": "relu",
        "dropout": 0.2
    },
    "antecedent_feedforward": {
        "input_dim": 4784,
        "num_layers": 2,
        "hidden_dims": 150,
        "activations": "relu",
        "dropout": 0.2
    },
    "initializer": [
        [".*linear_layers.*weight", {"type": "xavier_normal"}],
        [".*scorer._module.weight", {"type": "xavier_normal"}],
        ["_distance_embedding.weight", {"type": "xavier_normal"}],
        ["_span_width_embedding.weight", {"type": "xavier_normal"}],
        ["_context_layer._module.weight_ih.*", {"type": "xavier_normal"}],
        ["_context_layer._module.weight_hh.*", {"type": "orthogonal"}]
    ],
    "lexical_dropout": 0.5,
    "feature_size": 20,
    "max_span_width": 10,
    "spans_per_word": 0.4,
    "max_antecedents": 150
  },
  "iterator": {
    "type": "bucket",
    "sorting_keys": [["text", "num_tokens"]],
    "padding_noise": 0.0,
    "batch_size": 1
  },
  "trainer": {
    "num_epochs": 150,
    "grad_norm": 5.0,
    "patience" : 10,
    "cuda_device" : 3,
    "validation_metric": "+coref_f1",
    "learning_rate_scheduler": {
      "type": "reduce_on_plateau",
      "factor": 0.5,
      "mode": "max",
      "patience": 2
    },
    "optimizer": {
      "type": "adam",
      "parameter_groups": [
                   [[".*bert.*"], {"lr": 1e-5}],
                   [["_context.*", "_antecedent.*","_mention.*", "_endpoint", "_attentive.*", "_distance.*"], {"lr": 1e-3}]
       ]
    }
  }
}
	local bert_model = "bert-base-uncased";
	local train_path = "./datasets/coref/train.english.v4_gold_conll";
	local dev_path = "./datasets/coref/dev.english.v4_gold_conll";
	local test_path = "./datasets/coref/test.english.v4_gold_conll";

	{
	"dataset_reader": {
	"type": "coref",
	"token_indexers": {
	"bert": {
	"type": "pretrained_transformer",
	"model_name": "bert-base-uncased",
	"do_lowercase": true
	}
	},
	"max_span_width": 10
	},
	"train_data_path": train_path,
	"validation_data_path": dev_path,
	"test_data_path": test_path,
	"evaluate_on_test": true,
	"model": {
	"type": "coref",
	"text_field_embedder": {
	"allow_unmatched_keys": true,
	"token_embedders": {
	"bert": {
	"type": "pretrained_transformer",
	"model_name": "bert-base-uncased"
	//"requires_grad": true,
	//"top_layer_only": true
	}
	}
	},
	"context_layer": {
	"type": "lstm",
	"bidirectional": true,
	"input_size": 768,
	"hidden_size": 200,
	"num_layers": 1
	},
	"mention_feedforward": {
	"input_dim": 1588,
	"num_layers": 2,
	"hidden_dims": 150,
	"activations": "relu",
	"dropout": 0.2
	},
	"antecedent_feedforward": {
	"input_dim": 4784,
	"num_layers": 2,
	"hidden_dims": 150,
	"activations": "relu",
	"dropout": 0.2
	},
	"initializer": [
	[".linear_layers.weight", {"type": "xavier_normal"}],
	[".*scorer._module.weight", {"type": "xavier_normal"}],
	["_distance_embedding.weight", {"type": "xavier_normal"}],
	["_span_width_embedding.weight", {"type": "xavier_normal"}],
	["_context_layer._module.weight_ih.*", {"type": "xavier_normal"}],
	["_context_layer._module.weight_hh.*", {"type": "orthogonal"}]
	],
	"lexical_dropout": 0.5,
	"feature_size": 20,
	"max_span_width": 10,
	"spans_per_word": 0.4,
	"max_antecedents": 150
	},
	"iterator": {
	"type": "bucket",
	"sorting_keys": [["text", "num_tokens"]],
	"padding_noise": 0.0,
	"batch_size": 1
	},
	"trainer": {
	"num_epochs": 150,
	"grad_norm": 5.0,
	"patience" : 10,
	"cuda_device" : 3,
	"validation_metric": "+coref_f1",
	"learning_rate_scheduler": {
	"type": "reduce_on_plateau",
	"factor": 0.5,
	"mode": "max",
	"patience": 2
	},
	"optimizer": {
	"type": "adam",
	"parameter_groups": [
	[[".bert."], {"lr": 1e-5}],
	[["_context.", "_antecedent.","_mention.", "_endpoint", "_attentive.", "_distance.*"], {"lr": 1e-3}]
	]
	}
	}
	}