Last active
May 28, 2016 14:15
-
-
Save mtairu/8af6664bac5ed0b2af6e3198b29d3332 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#~ Rough code to get you going with Stanford POS Tagger on NLTK. | |
#~ http://netmatrx.com | |
#~ | |
import os | |
#~from nltk.parse import stanford | |
os.environ['JAVAHOME'] = 'C:\Program Files\Java\jdk1.8.0_91' | |
#os.environ['STANFORD_POSTAGGER'] = 'C:\stanford\stanford-postagger-3.6.0.jar' | |
#os.environ['STANFORD_MODELS'] = 'C:\stanford\models\english-bidirectional-distsim.tagger' | |
from nltk.tag.stanford import StanfordPOSTagger | |
text = 'This morning , I should have stayed in bed .'; | |
tokens = text.split() | |
model = 'C:\stanford\models\english-bidirectional-distsim.tagger' | |
jar = 'C:\stanford\stanford-postagger-3.6.0.jar' | |
tagger = StanfordTagger(model, path_to_jar=jar, encoding='UTF-8') | |
print tagger.tag(tokens) or you can do tagger.tag[('man')] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment