Skip to content

Instantly share code, notes, and snippets.

View abhishek-shrm's full-sized avatar

ABHISHEK SHARMA abhishek-shrm

  • ZS Associates
  • New Delhi, India
View GitHub Profile
# Making final predictions
final_pred=model.predict(test_seq)
# Creating dataframe for final probabilties
prob=pd.DataFrame(columns=['id','toxic','severe_toxic','obscene','threat','insult','identity_hate'])
prob['id']=df_test['id']
# Populating dataframe with probabilities
for index,value in enumerate(['toxic','severe_toxic','obscene','threat','insult','identity_hate']):
prob[value]=final_pred[:,index]
# Out-of-sample Evaluation
valid_pred=model.predict(x_valid_split)
print('In-sample Evaluation ROC-AUC Score:\n',roc_auc_score(y_valid_split,valid_pred))
from sklearn.metrics import roc_auc_score
# In-sample Evaluation
train_pred=model.predict(x_train_split)
print('In-sample Evaluation ROC-AUC Score:\n',roc_auc_score(y_train_split,train_pred))
# Compile Model
model.compile(optimizer='adam',loss='binary_crossentropy',metrics=["accuracy"])
# Callbacks
es=EarlyStopping(monitor='val_loss', mode='min', verbose=1,patience=5,min_delta=1e-5)
mc = ModelCheckpoint("/kaggle/working/model.hdf5", monitor='val_loss', verbose=0,
save_best_only=True, mode='min')
# Training the model
model.fit(x_train_split,y_train_split, batch_size=512, epochs=100, verbose=1,
# Importing from keras
from keras.models import Model
from keras.layers import Input, Dense, Embedding, Dropout, Conv1D, GlobalMaxPooling1D
from keras.callbacks import EarlyStopping, ModelCheckpoint
# Defining model architecture
input_1=Input(shape=(100,))
embedding_1=Embedding(vocabulary,100)(input_1)
conv_1=Conv1D(filters=352,kernel_size=7,padding="same")(embedding_1)
dropout_1=Dropout(0.06675)(conv_1)
print('Shape of Train Split=>',x_train_split.shape,y_train_split.shape)
print('Shape of Validation Split=>',x_valid_split.shape,y_valid_split.shape)
from iterstrat.ml_stratifiers import MultilabelStratifiedShuffleSplit
msss=MultilabelStratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=0)
for train_index, val_index in msss.split(train_seq, y_train):
# Creating Train Set
x_train_split,y_train_split=train_seq[train_index],y_train[train_index]
# Creating Test Set
x_valid_split,y_valid_split=train_seq[val_index],y_train[val_index]
# Installing for Stratified Split
!pip install iterative-stratification
y_train=df_train[['toxic','severe_toxic','obscene','threat','insult','identity_hate']].values
print('Shape of Training Labels=>',y_train.shape)
print('Shape of train_sequence=>',train_seq.shape)
print('Shape of test_sequence=>',test_seq.shape)