Last active
July 18, 2019 06:04
-
-
Save shubham-singh-ss/db67254c3594632e5ed3db4f0e908079 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from spacy.lang.en import English | |
# Load English tokenizer, tagger, parser, NER and word vectors | |
nlp = English() | |
text = """He determined to drop his litigation with the monastry, and relinguish his claims to the wood-cuting and | |
fishery rihgts at once. He was the more ready to do this becuase the rights had become much less valuable, and he had | |
indeed the vaguest idea where the wood and river in question were.""" | |
# "nlp" Object is used to create documents with linguistic annotations. | |
my_doc = nlp(text) | |
# Create list of word tokens | |
token_list = [] | |
for token in my_doc: | |
token_list.append(token.text) | |
from spacy.lang.en.stop_words import STOP_WORDS | |
# Create list of word tokens after removing stopwords | |
filtered_sentence =[] | |
for word in token_list: | |
lexeme = nlp.vocab[word] | |
if lexeme.is_stop == False: | |
filtered_sentence.append(word) | |
print(token_list) | |
print(filtered_sentence) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment