Last active
October 10, 2020 07:02
-
-
Save megha444/f355ca2b16424ab161292677678639ad to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# function to train the model | |
def train(): | |
model_def.train() | |
totalloss, totalaccuracy = 0, 0 | |
# empty list to save model predictions | |
totalpreds=[] | |
# iterate over batches | |
for steps,batches in enumerate(trainDataLoader): | |
#update the progress post 50 batches per set | |
if steps % 50 == 0 and not stepa == 0: | |
print(' Batch {:>5,} of {:>5,}.'.format(steps, len(trainDataLoader))) | |
# push the batch to gpu | |
batches = [r.to(device) for r in batches] | |
sent_input_id, mask, labels = batches | |
# clear previously calculated gradients | |
model_def.zero_grad() | |
# get model predictions for the current batch | |
pred = model_def(sent_input_id, mask) | |
# compute the loss between actual and predicted values | |
los = crossentropy(pred, labels) | |
# add on to the total loss | |
totalloss = totalloss + los.item() | |
# performing backward pass for gradient calculation | |
los.backward() | |
# clipping the the gradient to 1.0. helps prevent exploding gradient problem | |
torch.nn.utils.clip_grad_norm_(model_def.parameters(), 1.0) | |
# update parameters | |
optimized.step() | |
# model predictions are stored on GPU. So, push it to CPU | |
pred=pred.detach().cpu().numpy() | |
# append the model predictions | |
totalpred.append(pred) | |
# compute the training loss of the epoch | |
avgloss = totalloss / len(trainDataLoader) | |
# predictions are in the form of (no. of batches, size of batch, no. of classes). | |
# reshape the predictions in form of (number of samples, no. of classes) | |
totalpred = np.concatenate(totalpred, axis=0) | |
#returns the loss and predictions | |
return avgloss, totalpred | |
#DEFINING THE EVALUATE FUNCTION | |
# function for evaluating the model | |
def evaluate(): | |
print("\n Evaluating") | |
# deactivate dropout layers | |
model_def.eval() | |
totalloss, totalaccuracy = 0, 0 | |
# empty list to save the model predictions | |
totalpred = [] | |
# iterate over batches | |
for steps,batches in enumerate(valDataLoader): | |
# Progress update every 50 batches. | |
if steps % 50 == 0 and not steps == 0: | |
# Calculate elapsed time in minutes. | |
elapse = format_time(time.time() - t0) | |
# Report progress. | |
print(' Batch {:>5,} of {:>5,}.'.format(steps, len(valDataLoader))) | |
# push the batch to gpu | |
batches = [t.to(device) for t in batches] | |
sent_input_id, mask, labels = batches | |
# deactivate autograd | |
with torch.no_grad(): | |
# model predictions | |
pred = model(sent_input_id, mask) | |
# compute the validation loss between actual and predicted values | |
los = crossentropy(pred,labels) | |
totalloss = totalloss + los.item() | |
pred = pred.detach().cpu().numpy() | |
totalpred.append(pred) | |
# compute the validation loss of the epoch | |
avgloss = totalloss / len(valDataLoader) | |
# reshape the predictions in form of (number of samples, no. of classes) | |
totalpred = np.concatenate(totalpred, axis=0) | |
return avgloss, totalpred | |
#FINE TUNING THE MODEL | |
# set initial loss to infinite | |
bestvalidloss = float('inf') | |
# empty lists to store training and validation loss of each epoch | |
trainloss=[] | |
validloss=[] | |
#for each epoch | |
for epoch in range(epochs): | |
print('\n Epoch {:} / {:}'.format(epoch + 1, epochs)) | |
#train model | |
train_losses, _ = train() | |
#evaluate model | |
valid_losses, _ = evaluate() | |
#save the best model | |
if valid_losses < bestvalidloss: | |
bestvalidloss = validloss | |
torch.save(model_def.state_dict(), 'saved_weights.pt') | |
# append training and validation loss | |
trainlosse.append(train_losses) | |
validloss.append(valid_losses) | |
print(f'\nTraining Loss: {train_losses:.3f}') | |
print(f'Validation Loss: {valid_losses:.3f}') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment