Skip to content

Instantly share code, notes, and snippets.

@vivek1240
Last active May 30, 2021 10:04
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save vivek1240/259473cbc64c7a70c14391d0a6afb7f2 to your computer and use it in GitHub Desktop.
Save vivek1240/259473cbc64c7a70c14391d0a6afb7f2 to your computer and use it in GitHub Desktop.
# tokenize - break down each sentence into a list of words
import gensim
from gensim.utils import simple_preprocess
from gensim.parsing.preprocessing import STOPWORDS
stop_words = STOPWORDS
import spacy
nlp = spacy.load('en_core_web_sm', disable=['parser', 'ner'])
from nltk.tokenize.treebank import TreebankWordDetokenizer
def sent_to_words(sentence):
yield(gensim.utils.simple_preprocess(str(sentence), deacc=True)) # deacc=True removes punctuations
def remove_stopwords(text):
return [word for word in simple_preprocess(str(text)) if word not in stop_words]
def lemmatization(sent, allowed_postags=['NOUN', 'ADJ', 'VERB', 'ADV']):
doc = nlp(" ".join(sent))
texts_out = [token.lemma_ for token in doc if token.pos_ in allowed_postags]
return texts_out
def cleaning_new_text(data):
data_words = list(sent_to_words(data))
data_words= remove_stopwords(data_words)
data_words = lemmatization(data_words, allowed_postags=['NOUN', 'ADJ', 'VERB', 'ADV'])
return data_words
data= str('I have made a flight booking on Yatra. My PNR No are S6ZF2Y . I had booked a return flight for both the PNR. The outbound flight i.eรย "BOM -TRV"รย had been cancelled by the airways and the full amount has been credited back to my account.รย Now the Inbound flight i.e."รย TRV -รย BOM " has also been cancelled by the airways and the full amount has been processed to the yatra account on 24th Feb 2020. So I want you to refund me the amount ASAP. When would I get the refund back?Waiting for your reply')
data_words= cleaning_new_text(data)
data= TreebankWordDetokenizer().detokenize(data_words)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment