Skip to content

Instantly share code, notes, and snippets.

View frenzy2106's full-sized avatar

Ankit Choudhary frenzy2106

View GitHub Profile
import numpy as np
import pandas as pd
from sklearn.metrics import f1_score
from copy import deepcopy
from collections import namedtuple
# Evaluation metric for Innoplexus NER Challenge
def collect_named_entities(tokens): # Helper Function for score calculation
frenzy2106 /
Last active December 22, 2021 07:17
Mean Average Precision @ K
def apk(actual, predicted, k=3):
Computes the average precision at k.
This function computes the average prescision at k between two lists of
actual : list
A list of elements that are to be predicted (order doesn't matter)
predicted : list
# preparing the submission file
final_prediction = model.predict_classes(clean_test_data_pad)
test_tweets['label'] = final_prediction
test_predictions = test_tweets[['id','label']]
#Loading the test data
test_tweets = pd.read_csv("test_tweets_anuFYb8.csv")
#cleaning the text
test_data = test_tweets['tweet']
clean_test_data = clean_corpus(test_data)
#text to sequence and padding
clean_test_data_token = tokenizer.texts_to_sequences(clean_test_data)
# Train the model,y_train,batch_size=10,epochs=2, verbose=2)
# Building & Compiling the model
vocab_size = len(tokenizer.word_index) + 1
max_length = 25
model = keras.Sequential()
model.add(keras.layers.Dense(units=1, activation='sigmoid'))
# compile the model
# Creating Validation Set
X_train,X_test,y_train,y_test = train_test_split(corpus_pad,y,test_size=0.2,random_state=101)
X_train.shape, X_test.shape
#finding the average words present per comment
num_of_words_in_doc =[]
for doc in corpus_tokens:
print("Average number of words: ", np.average(num_of_words_in_doc))
#check how many individual words present in the corpus
word_dict = {}
for doc in corpus:
words = nltk.word_tokenize(doc)
for word in words:
if word not in word_dict:
word_dict[word] = 1
word_dict[word] += 1
def clean_corpus(text):
corpus = []
for i in range(len(text)):
tweet = re.sub(r"^[a-zA-Z0-9]*\s"," ", str(text[i]))
tweet = re.sub(r"\s+[a-zA-Z0-9]*\s"," ", tweet)
tweet = re.sub(r"\s+[a-zA-Z0-9]*$"," ", tweet)
tweet = tweet.lower()
tweet = re.sub(r"can't","can not", tweet)
tweet = re.sub(r"hv","have", tweet)
tweet = re.sub(r"ur","your", tweet)