Created
January 6, 2015 17:10
-
-
Save heathd/b6fa778f682cec8e688c to your computer and use it in GitHub Desktop.
stanford-nlp-test.rb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'stanford-core-nlp' | |
# Set an alternative path to look for the JAR files | |
# Default is gem's bin folder. | |
StanfordCoreNLP.jar_path = File.realpath(File.dirname(__FILE__)) + '/jars/' | |
StanfordCoreNLP.model_path = File.realpath(File.dirname(__FILE__)) + '/jars/' | |
StanfordCoreNLP.use :english | |
StanfordCoreNLP.model_files = {} | |
StanfordCoreNLP.default_jars = [ | |
'jollyday.jar', | |
'joda-time.jar', | |
'xom.jar', | |
'stanford-corenlp-3.5.0.jar', | |
'stanford-corenlp-3.5.0-models.jar', | |
'bridge.jar' | |
] | |
text = 'Angela Merkel met Nicolas Sarkozy on January 25th in ' + | |
'Berlin to discuss a new austerity package. Sarkozy ' + | |
'looked pleased, but Merkel was dismayed.' | |
pipeline = StanfordCoreNLP.load(:tokenize, :ssplit, :pos, :lemma, :parse, :ner) | |
text = StanfordCoreNLP::Annotation.new(text) | |
pipeline.annotate(text) | |
text.get(:sentences).each do |sentence| | |
# Syntatical dependencies | |
puts sentence.get(:basic_dependencies).to_s | |
sentence.get(:tokens).each do |token| | |
# Default annotations for all tokens | |
puts token.get(:value).to_s | |
puts token.get(:original_text).to_s | |
puts token.get(:character_offset_begin).to_s | |
puts token.get(:character_offset_end).to_s | |
# POS returned by the tagger | |
puts token.get(:part_of_speech).to_s | |
# Lemma (base form of the token) | |
puts token.get(:lemma).to_s | |
# Named entity tag | |
puts token.get(:named_entity_tag).to_s | |
# Coreference | |
puts token.get(:coref_cluster_id).to_s | |
# Also of interest: coref, coref_chain, | |
# coref_cluster, coref_dest, coref_graph. | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment