Created
April 28, 2013 09:56
-
-
Save sergey-tihon/5476455 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using java.io; | |
using edu.stanford.nlp.process; | |
using edu.stanford.nlp.ling; | |
using edu.stanford.nlp.trees; | |
using edu.stanford.nlp.parser.lexparser; | |
namespace Stanford_Parser | |
{ | |
class Program | |
{ | |
static void demoAPI(LexicalizedParser lp) | |
{ | |
// This option shows parsing a list of correctly tokenized words | |
var sent = new[] { "This", "is", "an", "easy", "sentence", "." }; | |
var rawWords = Sentence.toCoreLabelList(sent); | |
var parse = lp.apply(rawWords); | |
parse.pennPrint(); | |
// This option shows loading and using an explicit tokenizer | |
var sent2 = "This is another sentence."; | |
var tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), ""); | |
var sent2Reader = new StringReader(sent2); | |
var rawWords2 = tokenizerFactory.getTokenizer(sent2Reader).tokenize(); | |
parse = lp.apply(rawWords2); | |
var tlp = new PennTreebankLanguagePack(); | |
var gsf = tlp.grammaticalStructureFactory(); | |
var gs = gsf.newGrammaticalStructure(parse); | |
var tdl = gs.typedDependenciesCCprocessed(); | |
System.Console.WriteLine(); | |
for(var it=tdl.iterator(); it.hasNext();) | |
System.Console.WriteLine("{0}", it.next()); | |
System.Console.WriteLine(); | |
var tp = new TreePrint("penn,typedDependenciesCollapsed"); | |
tp.printTree(parse); | |
} | |
static void Main(string[] args) | |
{ | |
var lp = LexicalizedParser.loadModel(@"..\..\..\..\StanfordNLPLibraries\stanford-parser\stanford-parser-2.0.4-models\englishPCFG.ser.gz"); | |
demoAPI(lp); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment