Skip to content

Instantly share code, notes, and snippets.

@josht-jpg
Created September 5, 2020 01:08
Show Gist options
  • Save josht-jpg/70a0e82255645a4a6fce19b416487072 to your computer and use it in GitHub Desktop.
Save josht-jpg/70a0e82255645a4a6fce19b416487072 to your computer and use it in GitHub Desktop.
Cleaning Labelled
def clean_labelled(sentence, stop_words):
sentence = sentence.lower()
sentence_tokens_clean = nltk.tokenize.RegexpTokenizer(r'\w+').\
tokenize(sentence)
sentence_clean = pd.DataFrame(sentence_tokens_clean, columns = ['word'])
sentence_clean = [w for w in sentence_tokens_clean \
if w not in stop_words]
return sentence_clean
labelled_train['sentence'] = labelled_train['sentence'].\
apply(lambda x:clean_labelled(x, stop_words_context))
labelled_test['sentence'] = labelled_test['sentence'].\
apply(lambda x:clean_labelled(x, stop_words_context))
def nrc_sentence(sentence):
total = np.zeros(10)
for word in sentence:
nrc = nrc_classify(word)
if(len(nrc) > 0): total = np.add(total, nrc)
return total
labelled_train['classification'] = labelled_train['sentence'].\
apply(nrc_sentence)
labelled_test['classification'] = labelled_test['sentence'].\
apply(nrc_sentence)
def labelled_adjust_class(labelled):
classification_df = pd.DataFrame.from_dict( \
dict(labelled['classification'])).transpose()
classification_df.columns = NRC_sentiments
labelled = labelled.join(classification_df)
labelled = labelled.drop(['classification'], axis = 1)
return labelled
labelled_train = labelled_adjust_class(labelled_train)
labelled_test = labelled_adjust_class(labelled_test)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment