This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
data['cleaned_summary'] = data['cleaned_summary'].apply(lambda x : '_START_ '+ x + ' _END_') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
for i in range(5): | |
print("Review:",data['cleaned_text'][i]) | |
print("Summary:",data['cleaned_summary'][i]) | |
print("\n") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import matplotlib.pyplot as plt | |
text_word_count = [] | |
summary_word_count = [] | |
# populate the lists with sentence lengths | |
for i in data['cleaned_text']: | |
text_word_count.append(len(i.split())) | |
for i in data['cleaned_summary']: | |
summary_word_count.append(len(i.split())) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
max_len_text=80 | |
max_len_summary=10 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.model_selection import train_test_split | |
x_tr,x_val,y_tr,y_val=train_test_split(data['cleaned_text'],data['cleaned_summary'],test_size=0.1,random_state=0,shuffle=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#prepare a tokenizer for reviews on training data | |
x_tokenizer = Tokenizer() | |
x_tokenizer.fit_on_texts(list(x_tr)) | |
#convert text sequences into integer sequences | |
x_tr = x_tokenizer.texts_to_sequences(x_tr) | |
x_val = x_tokenizer.texts_to_sequences(x_val) | |
#padding zero upto maximum length | |
x_tr = pad_sequences(x_tr, maxlen=max_len_text, padding='post') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#preparing a tokenizer for summary on training data | |
y_tokenizer = Tokenizer() | |
y_tokenizer.fit_on_texts(list(y_tr)) | |
#convert summary sequences into integer sequences | |
y_tr = y_tokenizer.texts_to_sequences(y_tr) | |
y_val = y_tokenizer.texts_to_sequences(y_val) | |
#padding zero upto maximum length | |
y_tr = pad_sequences(y_tr, maxlen=max_len_summary, padding='post') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from keras import backend as K | |
K.clear_session() | |
latent_dim = 500 | |
# Encoder | |
encoder_inputs = Input(shape=(max_len_text,)) | |
enc_emb = Embedding(x_voc_size, latent_dim,trainable=True)(encoder_inputs) | |
#LSTM 1 | |
encoder_lstm1 = LSTM(latent_dim,return_sequences=True,return_state=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
model.compile(optimizer='rmsprop', loss='sparse_categorical_crossentropy') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1) |