Skip to content

Instantly share code, notes, and snippets.

View arunm8489's full-sized avatar

Arun Mohan arunm8489

View GitHub Profile
train = Train(epochs=7,lr=0.01)
metrics = train.train_model(model)
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
model = Network(weight_matrix=embedding_matrix,hidden_dim=128,seq_len=440)
model = model.to(device)
model
class Train():
def __init__(self,epochs,lr=0.01,train_loader=train_loader,test_loader=test_loader,seq_len=440):
self.train_loader = train_loader
self.test_loader = test_loader
self.epochs = epochs
self.lr = lr
self.seq_len = seq_len
self.checkpoint_path = 'model1/chkpoint1_'
self.best_model_path = 'model1/bestmodel1.pt'
self.test_loss_min = 3.95275
class Network(nn.Module):
def __init__(self,weight_matrix=embedding_matrix,hidden_dim=128,seq_len=440):
super().__init__()
vocab_size = weight_matrix.shape[0]
vector_dim = weight_matrix.shape[1]
self.seq_len = seq_len
#text data
self.hidden_dim = hidden_dim
self.embedding = nn.Embedding(vocab_size,vector_dim)
# create Tensor datasets
train_data = TensorDataset(torch.from_numpy(final_train), torch.from_numpy(y_train.values))
test_data = TensorDataset(torch.from_numpy(final_test), torch.from_numpy(y_test.values))
# dataloaders
batch_size = 500
# make sure to SHUFFLE your data
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=True )
#loading embedding
embedding_matrix = np.load('embedding_matrix_2.npy')
x_train_ = [essay_train_pad,lb_train_school_state.reshape(-1,1),lb_train_teacher_prefix.reshape(-1,1),lb_train_category.reshape(-1,1),lb_train_sub_category.reshape(-1,1),lb_train_grade_category.reshape(-1,1),std_train_numeric]
x_test_ = [essay_test_pad,lb_test_school_state.reshape(-1,1),lb_test_teacher_prefix.reshape(-1,1),lb_test_category.reshape(-1,1),lb_test_sub_category.reshape(-1,1),lb_test_grade_category.reshape(-1,1),std_test_numeric]
final_train = np.concatenate(x_train_,axis=1)
final_test = np.concatenate(x_test_,axis=1)
embedding_matrix = np.zeros((len(corpus) + 1, 300))
for i,word in enumerate(corpus):
if word in glove_dict.keys():
embedding_vec = glove_dict[word]
embedding_matrix[i] = embedding_vec
print(embedding_matrix.shape)
np.save('embedding_matrix_2.npy',embedding_matrix)
#code from https://stackoverflow.com/questions/37793118/load-pretrained-glove-vectors-in-python
def loadGloveModel(File):
print("Loading Glove Model")
f = open(File,'r')
gloveModel = {}
for line in f:
splitLines = line.split()
word = splitLines[0]
wordEmbedding = np.array([float(value) for value in splitLines[1:]])
gloveModel[word] = wordEmbedding
rev_len = [len(i) for i in essay_train_p]
pd.Series(rev_len).hist()
plt.show()
pd.Series(rev_len).describe()
def padding_(sentences, seq_len):
"""
do padding on left handside
ie, if seq_len = 5 and input is [1,2,3] out will be [0,0,1,2,3]
"""
features = np.zeros((len(sentences), seq_len),dtype=int)
for ii, review in enumerate(sentences):
if len(review) != 0:
features[ii, -len(review):] = np.array(review)[:seq_len]