# Stopwords removal & Lemmatizing tokens using SpaCy | |
import spacy | |
nlp = spacy.load('en_core_web_sm',disable=['ner','parser']) | |
nlp.max_length=5000000 | |
# Removing Stopwords and Lemmatizing words | |
training_corpus['lemmatized']=training_corpus['cleaned'].progress_apply(lambda x: ' '.join([token.lemma_ for token in list(nlp(x)) if (token.is_stop==False)])) | |
testing_corpus['lemmatized']=testing_corpus['cleaned'].progress_apply(lambda x: ' '.join([token.lemma_ for token in list(nlp(x)) if (token.is_stop==False)])) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment