Created
January 18, 2013 21:09
-
-
Save hkf/4568541 to your computer and use it in GitHub Desktop.
stanford-core-nlp example
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'stanford-core-nlp' | |
root = Dir.pwd | |
# Set an alternative path to look for the JAR files | |
# Default is gem's bin folder. | |
StanfordCoreNLP.jar_path = root + '/stanford-core-nlp-full/' | |
# Set an alternative path to look for the model files | |
# Default is gem's bin folder. | |
StanfordCoreNLP.model_path = root + '/stanford-core-nlp-full/' | |
# Pass some alternative arguments to the Java VM. | |
# Default is ['-Xms512M', '-Xmx1024M'] (be prepared | |
# to take a coffee break). | |
#StanfordCoreNLP.jvm_args = ['-option1', '-option2'] | |
# Redirect VM output to log.txt | |
StanfordCoreNLP.log_file = 'log.txt' | |
# Change a specific model file. | |
StanfordCoreNLP.set_model('pos.model', 'english-left3words-distsim.tagger') | |
# Use the model files for a different language than English. | |
StanfordCoreNLP.use :french # or :german | |
text = 'Angela Merkel met Nicolas Sarkozy on January 25th in ' + | |
'Berlin to discuss a new austerity package. Sarkozy ' + | |
'looked pleased, but Merkel was dismayed.' | |
pipeline = StanfordCoreNLP.load(:tokenize, :ssplit, :pos, :lemma, :parse, :ner, :dcoref) | |
text = StanfordCoreNLP::Annotation.new(text) | |
pipeline.annotate(text) | |
text.get(:sentences).each do |sentence| | |
# Syntatical dependencies | |
puts sentence.get(:basic_dependencies).to_s | |
sentence.get(:tokens).each do |token| | |
# Default annotations for all tokens | |
puts token.get(:value).to_s | |
puts token.get(:original_text).to_s | |
puts token.get(:character_offset_begin).to_s | |
puts token.get(:character_offset_end).to_s | |
# POS returned by the tagger | |
puts token.get(:part_of_speech).to_s | |
# Lemma (base form of the token) | |
puts token.get(:lemma).to_s | |
# Named entity tag | |
puts token.get(:named_entity_tag).to_s | |
# Coreference | |
puts token.get(:coref_cluster_id).to_s | |
# Also of interest: coref, coref_chain, | |
# coref_cluster, coref_dest, coref_graph. | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment