This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Making final predictions | |
final_pred=model.predict(test_seq) | |
# Creating dataframe for final probabilties | |
prob=pd.DataFrame(columns=['id','toxic','severe_toxic','obscene','threat','insult','identity_hate']) | |
prob['id']=df_test['id'] | |
# Populating dataframe with probabilities | |
for index,value in enumerate(['toxic','severe_toxic','obscene','threat','insult','identity_hate']): | |
prob[value]=final_pred[:,index] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Out-of-sample Evaluation | |
valid_pred=model.predict(x_valid_split) | |
print('In-sample Evaluation ROC-AUC Score:\n',roc_auc_score(y_valid_split,valid_pred)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.metrics import roc_auc_score | |
# In-sample Evaluation | |
train_pred=model.predict(x_train_split) | |
print('In-sample Evaluation ROC-AUC Score:\n',roc_auc_score(y_train_split,train_pred)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Compile Model | |
model.compile(optimizer='adam',loss='binary_crossentropy',metrics=["accuracy"]) | |
# Callbacks | |
es=EarlyStopping(monitor='val_loss', mode='min', verbose=1,patience=5,min_delta=1e-5) | |
mc = ModelCheckpoint("/kaggle/working/model.hdf5", monitor='val_loss', verbose=0, | |
save_best_only=True, mode='min') | |
# Training the model | |
model.fit(x_train_split,y_train_split, batch_size=512, epochs=100, verbose=1, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Importing from keras | |
from keras.models import Model | |
from keras.layers import Input, Dense, Embedding, Dropout, Conv1D, GlobalMaxPooling1D | |
from keras.callbacks import EarlyStopping, ModelCheckpoint | |
# Defining model architecture | |
input_1=Input(shape=(100,)) | |
embedding_1=Embedding(vocabulary,100)(input_1) | |
conv_1=Conv1D(filters=352,kernel_size=7,padding="same")(embedding_1) | |
dropout_1=Dropout(0.06675)(conv_1) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
print('Shape of Train Split=>',x_train_split.shape,y_train_split.shape) | |
print('Shape of Validation Split=>',x_valid_split.shape,y_valid_split.shape) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from iterstrat.ml_stratifiers import MultilabelStratifiedShuffleSplit | |
msss=MultilabelStratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=0) | |
for train_index, val_index in msss.split(train_seq, y_train): | |
# Creating Train Set | |
x_train_split,y_train_split=train_seq[train_index],y_train[train_index] | |
# Creating Test Set | |
x_valid_split,y_valid_split=train_seq[val_index],y_train[val_index] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Installing for Stratified Split | |
!pip install iterative-stratification |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
y_train=df_train[['toxic','severe_toxic','obscene','threat','insult','identity_hate']].values | |
print('Shape of Training Labels=>',y_train.shape) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
print('Shape of train_sequence=>',train_seq.shape) | |
print('Shape of test_sequence=>',test_seq.shape) |