Prateek Joshi prateekjoshi565

## nlg_get_seqs.py
seqs = [create_seq(i) for i in movie_plots]

# merge list-of-lists into a single list
seqs = sum(seqs, [])

# count of sequences
len(seqs)

## nlg_seq_prep_func.py
# create sequences of length 5 tokens
def create_seq(text, seq_len = 5):

    sequences = []

    # if the number of tokens in 'text' is greater than 5
    if len(text.split()) > seq_len:
      for i in range(seq_len, len(text.split())):
        # select sequence of tokens
        seq = text.split()[i-seq_len:i+1]

## nlg_get_data.py
# read pickle file
pickle_in = open("plots_text.pickle","rb")
movie_plots = pickle.load(pickle_in)

# count of movie plot summaries
len(movie_plots)

## nlg_import_lib.py
import re
import pickle
import random

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F

## performance_bert.py
preds = np.argmax(preds, axis = 1)
print(classification_report(test_y, preds))

## prediction_bert.py
# get predictions for test data
with torch.no_grad():
  preds = model(test_seq.to(device), test_mask.to(device))
  preds = preds.detach().cpu().numpy()

## load_weights_bert.py
#load weights of best model
path = 'saved_weights.pt'
model.load_state_dict(torch.load(path))

## start_training_bert.py
# set initial loss to infinite
best_valid_loss = float('inf')

# empty lists to store training and validation loss of each epoch
train_losses=[]
valid_losses=[]

#for each epoch
for epoch in range(epochs):


## evaluate_bert.py
# function for evaluating the model
def evaluate():

  print("\nEvaluating...")

  # deactivate dropout layers
  model.eval()

  total_loss, total_accuracy = 0, 0


## train_bert.py
# function to train the model
def train():

  model.train()

  total_loss, total_accuracy = 0, 0

  # empty list to save model predictions
  total_preds=[]
	seqs = [create_seq(i) for i in movie_plots]

	# merge list-of-lists into a single list
	seqs = sum(seqs, [])

	# count of sequences
	len(seqs)
	# create sequences of length 5 tokens
	def create_seq(text, seq_len = 5):

	sequences = []

	# if the number of tokens in 'text' is greater than 5
	if len(text.split()) > seq_len:
	for i in range(seq_len, len(text.split())):
	# select sequence of tokens
	seq = text.split()[i-seq_len:i+1]
	# read pickle file
	pickle_in = open("plots_text.pickle","rb")
	movie_plots = pickle.load(pickle_in)

	# count of movie plot summaries
	len(movie_plots)
	import re
	import pickle
	import random

	import numpy as np
	import pandas as pd
	import torch
	import torch.nn as nn
	import torch.nn.functional as F
	preds = np.argmax(preds, axis = 1)
	print(classification_report(test_y, preds))
	# get predictions for test data
	with torch.no_grad():
	preds = model(test_seq.to(device), test_mask.to(device))
	preds = preds.detach().cpu().numpy()
	#load weights of best model
	path = 'saved_weights.pt'
	model.load_state_dict(torch.load(path))
	# set initial loss to infinite
	best_valid_loss = float('inf')

	# empty lists to store training and validation loss of each epoch
	train_losses=[]
	valid_losses=[]

	#for each epoch
	for epoch in range(epochs):
	# function for evaluating the model
	def evaluate():

	print("\nEvaluating...")

	# deactivate dropout layers
	model.eval()

	total_loss, total_accuracy = 0, 0
	# function to train the model
	def train():

	model.train()

	total_loss, total_accuracy = 0, 0

	# empty list to save model predictions
	total_preds=[]