This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
print(f"Running inference on {config.s3_parent_dir}/run_{config.run_num}") | |
data_parent_path = f"{config.s3_parent_dir}/data/prepared_data/{config.encoded_data_dir}" | |
id2label_local_path = os.path.join(os.getcwd(), 'id2label.json') | |
if not os.path.isfile(id2label_local_path): | |
id2label_s3_path = f"{data_parent_path}/id2label.json" | |
bucket.download_file(id2label_s3_path, id2label_local_path) | |
with open(id2label_local_path) as f: | |
id2label = json.load(f) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
if config.run_inference: | |
role = get_role(config.execution_role) | |
processor = ScriptProcessor( | |
command=['python3'], | |
image_uri=config.docker_image_path, | |
role=role.arn, | |
instance_count=1, | |
instance_type=config.preparation_instance, | |
volume_size_in_gb=config.storage_size, | |
max_runtime_in_seconds=config.preparation_runtime, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
if not is_tuning_job: | |
# this is a training job, just upload | |
write_experiment_results(files_to_be_uploaded, s3_run_dir) | |
else: | |
# this is a tuning job, the run dir should only have the best result | |
if folder_exists(bucket, s3_run_dir): | |
temp_file = os.path.join(os.getcwd(), 'remote_best_model_stats.json') | |
s3_path = os.path.join(s3_run_dir, 'best_model_stats.json') | |
bucket.download_file(s3_path, temp_file) | |
with open(temp_file) as f: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def convert_to_torchscript(model_path, best_model, train_data, hyperparams): | |
cpu_model = best_model.cpu() | |
cpu_model.eval() | |
sample_instance = train_data[0] | |
ordered_input_keys = ordered_model_input_keys() | |
example_inputs = [] | |
if not isinstance(ordered_input_keys, OrderedDict): | |
ordered_input_keys = ordered_input_keys[hyperparams['model_name']] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# below only necessary if we're using tracking | |
accelerator.end_training() | |
if accelerator.is_main_process: | |
run_config_local_path = write_run_config(hyperparams['train_batch_size'], hyperparams['eval_batch_size'], | |
hyperparams['learning_rate'], hyperparams['epochs']) | |
local_best_model = os.path.join(os.getcwd(), f"{hyperparams['model_name']}_finetuned.pth") | |
accelerator.save(best_model.state_dict(), local_best_model) | |
local_torchscript_model_path = os.path.join(os.getcwd(), f"{hyperparams['model_name']}_finetuned.pt") | |
convert_to_torchscript(local_torchscript_model_path,best_model, train_data, hyperparams) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
accelerator.wait_for_everyone() | |
unwrapped_best_model = accelerator.unwrap_model(curr_best_model) | |
if accelerator.is_main_process: | |
# this print line is used by tuning jobs to select the best job | |
print(f"objective_metric_f1={best_f1};") | |
loss_filename, learning_filename, f1_filename = plot_curves( | |
train_dataloader.batch_size, | |
num_train_epochs, | |
learning_rate, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
if accelerator.is_main_process: | |
training_loss = total_loss.item() / len(train_dataloader) | |
train_eval_data['train_loss_history'].append(training_loss) | |
train_eval_data['valid_acc_history'].append(valid_acc) | |
train_eval_data['valid_f1_history'].append(valid_f1) | |
accelerator.log( | |
{ | |
"accuracy": valid_acc, | |
"f1": valid_f1, | |
"train_loss": total_loss.item() / len(train_dataloader), |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
for step, batch in enumerate(valid_dataloader): | |
if config.is_comparison: | |
batch = get_model_specific_batch(batch, model_name) | |
with torch.no_grad(): | |
outputs = model(**batch) | |
predictions = outputs.logits.argmax(-1) | |
predictions, references = accelerator.gather_for_metrics( | |
(predictions, batch['labels']) | |
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
total_loss = 0.0 | |
for batch in dataloader: | |
if config.is_comparison: | |
batch = get_model_specific_batch(batch, model_name) | |
with accelerator.accumulate(model): | |
outputs = model(**batch) | |
loss = outputs.loss | |
total_loss += loss.detach().float() | |
accelerator.backward(loss) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
t_total = len(train_dataloader) * num_train_epochs # total number of training steps | |
if accelerator.is_main_process: | |
print("Total number of training steps ", t_total) | |
train_eval_data = { | |
'train_loss_history': [], | |
'valid_acc_history': [], | |
'valid_f1_history': [], | |
} | |
best_f1 = 0 | |
best_acc = 0 |