Skip to content

Instantly share code, notes, and snippets.

View prateekjoshi565's full-sized avatar
🎯
Focusing

Prateek Joshi prateekjoshi565

🎯
Focusing
View GitHub Profile
@prateekjoshi565
prateekjoshi565 / model_architecture.py
Created February 6, 2019 09:51
Model Architecture
# build NMT model
def build_model(in_vocab,out_vocab, in_timesteps,out_timesteps,n):
model = Sequential()
model.add(Embedding(in_vocab, n, input_length=in_timesteps,
mask_zero=True))
model.add(LSTM(n))
model.add(RepeatVector(out_timesteps))
model.add(LSTM(n, return_sequences=True))
model.add(Dense(out_vocab, activation='softmax'))
return model
@prateekjoshi565
prateekjoshi565 / compile_model.py
Created February 6, 2019 09:55
Model Compilation
# model compilation (with 512 hidden units)
model = build_model(deu_vocab_size, eng_vocab_size, deu_length, eng_length, 512)
rms = optimizers.RMSprop(lr=0.001)
model.compile(optimizer=rms, loss='sparse_categorical_crossentropy')
@prateekjoshi565
prateekjoshi565 / train_model.py
Last active February 6, 2019 09:58
Train Model
filename = 'model.h1.24_jan_19'
# set checkpoint
checkpoint = ModelCheckpoint(filename, monitor='val_loss',
verbose=1, save_best_only=True,
mode='min')
# train model
history = model.fit(trainX, trainY.reshape(trainY.shape[0], trainY.shape[1], 1),
@prateekjoshi565
prateekjoshi565 / predict.py
Created February 6, 2019 10:00
Make Predictions
model = load_model('model.h1.24_jan_19')
preds = model.predict_classes(testX.reshape((testX.shape[0], testX.shape[1])))
@prateekjoshi565
prateekjoshi565 / integer_word_mapping.py
Created February 6, 2019 10:01
Integer to word mapping
def get_word(n, tokenizer):
for word, index in tokenizer.word_index.items():
if index == n:
return word
return None
preds_text = []
for i in preds:
temp = []
for j in range(len(i)):
t = get_word(i[j], eng_tokenizer)
if j > 0:
if (t==get_word(i[j-1],eng_tokenizer))or(t== None):
temp.append('')
else:
temp.append(t)
import pandas as pd
import numpy as np
import spacy
from tqdm import tqdm
import re
import time
import pickle
pd.set_option('display.max_colwidth', 200)
# remove URL's from train and test
train['clean_tweet'] = train['tweet'].apply(lambda x: re.sub(r'http\S+', '', x))
test['clean_tweet'] = test['tweet'].apply(lambda x: re.sub(r'http\S+', '', x))
@prateekjoshi565
prateekjoshi565 / text_preprocessing_elmo.py
Created March 6, 2019 18:33
text preprocessing elmo
# remove punctuation marks
punctuation = '!"#$%&()*+-/:;<=>?@[\\]^_`{|}~'
train['clean_tweet'] = train['clean_tweet'].apply(lambda x: ''.join(ch for ch in x if ch not in set(punctuation)))
test['clean_tweet'] = test['clean_tweet'].apply(lambda x: ''.join(ch for ch in x if ch not in set(punctuation)))
# convert text to lowercase
train['clean_tweet'] = train['clean_tweet'].str.lower()
test['clean_tweet'] = test['clean_tweet'].str.lower()
@prateekjoshi565
prateekjoshi565 / text_normalization_elmo.py
Last active March 24, 2019 06:16
text normalization elmo
# import spaCy's language model
nlp = spacy.load('en', disable=['parser', 'ner'])
# function to lemmatize text
def lemmatization(texts):
output = []
for i in texts:
s = [token.lemma_ for token in nlp(i)]
output.append(' '.join(s))
return output