Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Transformer QA multi GPU
local transformer_model = 'bert-base-cased';
local epochs = 3;
local batch_size = 8;
local train_path = "https://allennlp.s3.amazonaws.com/datasets/squad/squad-train-v1.1.json";
local dev_path = "https://allennlp.s3.amazonaws.com/datasets/squad/squad-dev-v1.1.json";
{
"dataset_reader": {
"type": "transformer_squad",
"transformer_model_name": transformer_model,
"skip_invalid_examples": true,
},
"validation_dataset_reader": self.dataset_reader + {
"skip_invalid_examples": false,
},
"train_data_path": train_path,
"validation_data_path": dev_path,
"vocabulary": {
"type": "empty",
},
"model": {
"type": "transformer_qa",
"transformer_model_name": transformer_model,
},
"data_loader": {
"batch_sampler": {
"type": "bucket",
"batch_size": batch_size,
}
},
"trainer": {
"optimizer": {
"type": "huggingface_adamw",
"weight_decay": 0.0,
"lr": 2e-5,
"eps": 1e-8,
},
"learning_rate_scheduler": {
"type": "slanted_triangular",
"num_epochs": epochs,
"cut_frac": 0.1,
},
"grad_clipping": 1.0,
"num_epochs": epochs,
},
"distributed": {
"cuda_devices": [0, 1, 2, 3],
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment