Last active
December 27, 2015 15:09
-
-
Save brendano/7345495 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package nlp; | |
import java.io.IOException; | |
import java.io.StringReader; | |
import edu.stanford.nlp.io.IOUtils; | |
import edu.stanford.nlp.ling.CoreAnnotations; | |
import edu.stanford.nlp.ling.CoreLabel; | |
import edu.stanford.nlp.trees.LabeledScoredTreeFactory; | |
import edu.stanford.nlp.trees.PennTreeReader; | |
import edu.stanford.nlp.trees.Tree; | |
import edu.stanford.nlp.trees.TreeFactory; | |
import edu.stanford.nlp.trees.TreePrint; | |
import edu.stanford.nlp.trees.TreeReader; | |
public class IndexAnnotationsMadness { | |
public static TreeFactory tree_factory = new LabeledScoredTreeFactory(); | |
public static Tree readTreeFromString(String parseStr) { | |
//read in the input into a Tree data structure | |
TreeReader treeReader = new PennTreeReader(new StringReader(parseStr), tree_factory); | |
Tree inputTree = null; | |
try{ | |
inputTree = treeReader.readTree(); | |
}catch(IOException e){ | |
e.printStackTrace(); | |
} | |
return inputTree; | |
} | |
public static void prettyPrint(Tree parseTree) { | |
TreePrint tp = new TreePrint("penn"); | |
tp.printTree(parseTree); | |
} | |
public static void main(String[] args) throws IOException { | |
String data = IOUtils.slurpFile(args[0]); | |
Tree t = readTreeFromString(data); | |
prettyPrint(t); | |
t.indexSpans(); | |
t.indexLeaves(); | |
t.setSpans(); | |
// for (Tree node : t.getLeaves()) { | |
// System.out.printf("LEAF index=%-4d startindex=%-4d endindex=%d span=%s ||| %s\n", | |
// ((CoreLabel)node.label()).get(CoreAnnotations.IndexAnnotation.class), | |
// ((CoreLabel)node.label()).get(CoreAnnotations.BeginIndexAnnotation.class), | |
// ((CoreLabel)node.label()).get(CoreAnnotations.EndIndexAnnotation.class), | |
// ((CoreLabel)node.label()).get(CoreAnnotations.SpanAnnotation.class), | |
// node); | |
// } | |
// System.out.println("----------------------------"); | |
for (Tree node : t.preOrderNodeList()) { | |
System.out.printf("NODE index=%-4d startindex=%-4d endindex=%-4d span=%-10s ||| %s\n", | |
((CoreLabel)node.label()).get(CoreAnnotations.IndexAnnotation.class), | |
((CoreLabel)node.label()).get(CoreAnnotations.BeginIndexAnnotation.class), | |
((CoreLabel)node.label()).get(CoreAnnotations.EndIndexAnnotation.class), | |
((CoreLabel)node.label()).get(CoreAnnotations.SpanAnnotation.class), | |
node); | |
} | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
% ~/sem/semsys/java.sh nlp.IndexAnnotationsMadness parse | |
(ROOT | |
(S | |
(NP (DT This) (NN campaign)) | |
(VP (VBD broke) | |
(PP (IN through) | |
(NP | |
(NP (DT the) (JJ Japanese) (NN army) (POS 's)) | |
(NN blockade))) | |
(S | |
(VP (TO to) | |
(VP (VB reach) | |
(NP | |
(NP (NN base) (NNS areas)) | |
(PP (IN behind) | |
(NP (NN enemy) (NNS lines))))))) | |
(, ,) | |
(S | |
(VP | |
(VP (VBG stirring) | |
(PRT (RP up)) | |
(NP (JJ anti-Japanese) (NN spirit)) | |
(PP (IN throughout) | |
(NP (DT the) (NN nation)))) | |
(CC and) | |
(VP (VBG influencing) | |
(NP | |
(NP (DT the) (NN situation)) | |
(PP (IN of) | |
(NP | |
(NP (DT the) (JJ anti-fascist) (NN war)) | |
(PP (IN of) | |
(NP (DT the) (NNS people))))) | |
(ADVP (RB worldwide))))))) | |
(. .))) | |
NODE index=null startindex=0 endindex=37 span=0 36 ||| (ROOT (S (NP (DT This) (NN campaign)) (VP (VBD broke) (PP (IN through) (NP (NP (DT the) (JJ Japanese) (NN army) (POS 's)) (NN blockade))) (S (VP (TO to) (VP (VB reach) (NP (NP (NN base) (NNS areas)) (PP (IN behind) (NP (NN enemy) (NNS lines))))))) (, ,) (S (VP (VP (VBG stirring) (PRT (RP up)) (NP (JJ anti-Japanese) (NN spirit)) (PP (IN throughout) (NP (DT the) (NN nation)))) (CC and) (VP (VBG influencing) (NP (NP (DT the) (NN situation)) (PP (IN of) (NP (NP (DT the) (JJ anti-fascist) (NN war)) (PP (IN of) (NP (DT the) (NNS people))))) (ADVP (RB worldwide))))))) (. .))) | |
NODE index=null startindex=0 endindex=37 span=0 36 ||| (S (NP (DT This) (NN campaign)) (VP (VBD broke) (PP (IN through) (NP (NP (DT the) (JJ Japanese) (NN army) (POS 's)) (NN blockade))) (S (VP (TO to) (VP (VB reach) (NP (NP (NN base) (NNS areas)) (PP (IN behind) (NP (NN enemy) (NNS lines))))))) (, ,) (S (VP (VP (VBG stirring) (PRT (RP up)) (NP (JJ anti-Japanese) (NN spirit)) (PP (IN throughout) (NP (DT the) (NN nation)))) (CC and) (VP (VBG influencing) (NP (NP (DT the) (NN situation)) (PP (IN of) (NP (NP (DT the) (JJ anti-fascist) (NN war)) (PP (IN of) (NP (DT the) (NNS people))))) (ADVP (RB worldwide))))))) (. .)) | |
NODE index=null startindex=0 endindex=2 span=0 1 ||| (NP (DT This) (NN campaign)) | |
NODE index=null startindex=0 endindex=1 span=0 0 ||| (DT This) | |
NODE index=1 startindex=0 endindex=1 span=null ||| This | |
NODE index=null startindex=1 endindex=2 span=1 1 ||| (NN campaign) | |
NODE index=2 startindex=1 endindex=2 span=null ||| campaign | |
NODE index=null startindex=2 endindex=36 span=2 35 ||| (VP (VBD broke) (PP (IN through) (NP (NP (DT the) (JJ Japanese) (NN army) (POS 's)) (NN blockade))) (S (VP (TO to) (VP (VB reach) (NP (NP (NN base) (NNS areas)) (PP (IN behind) (NP (NN enemy) (NNS lines))))))) (, ,) (S (VP (VP (VBG stirring) (PRT (RP up)) (NP (JJ anti-Japanese) (NN spirit)) (PP (IN throughout) (NP (DT the) (NN nation)))) (CC and) (VP (VBG influencing) (NP (NP (DT the) (NN situation)) (PP (IN of) (NP (NP (DT the) (JJ anti-fascist) (NN war)) (PP (IN of) (NP (DT the) (NNS people))))) (ADVP (RB worldwide))))))) | |
NODE index=null startindex=2 endindex=3 span=2 2 ||| (VBD broke) | |
NODE index=3 startindex=2 endindex=3 span=null ||| broke | |
NODE index=null startindex=3 endindex=9 span=3 8 ||| (PP (IN through) (NP (NP (DT the) (JJ Japanese) (NN army) (POS 's)) (NN blockade))) | |
NODE index=null startindex=3 endindex=4 span=3 3 ||| (IN through) | |
NODE index=4 startindex=3 endindex=4 span=null ||| through | |
NODE index=null startindex=4 endindex=9 span=4 8 ||| (NP (NP (DT the) (JJ Japanese) (NN army) (POS 's)) (NN blockade)) | |
NODE index=null startindex=4 endindex=8 span=4 7 ||| (NP (DT the) (JJ Japanese) (NN army) (POS 's)) | |
NODE index=null startindex=4 endindex=5 span=4 4 ||| (DT the) | |
NODE index=5 startindex=4 endindex=5 span=null ||| the | |
NODE index=null startindex=5 endindex=6 span=5 5 ||| (JJ Japanese) | |
NODE index=6 startindex=5 endindex=6 span=null ||| Japanese | |
NODE index=null startindex=6 endindex=7 span=6 6 ||| (NN army) | |
NODE index=7 startindex=6 endindex=7 span=null ||| army | |
NODE index=null startindex=7 endindex=8 span=7 7 ||| (POS 's) | |
NODE index=8 startindex=7 endindex=8 span=null ||| 's | |
NODE index=null startindex=8 endindex=9 span=8 8 ||| (NN blockade) | |
NODE index=9 startindex=8 endindex=9 span=null ||| blockade | |
NODE index=null startindex=9 endindex=16 span=9 15 ||| (S (VP (TO to) (VP (VB reach) (NP (NP (NN base) (NNS areas)) (PP (IN behind) (NP (NN enemy) (NNS lines))))))) | |
NODE index=null startindex=9 endindex=16 span=9 15 ||| (VP (TO to) (VP (VB reach) (NP (NP (NN base) (NNS areas)) (PP (IN behind) (NP (NN enemy) (NNS lines)))))) | |
NODE index=null startindex=9 endindex=10 span=9 9 ||| (TO to) | |
NODE index=10 startindex=9 endindex=10 span=null ||| to | |
NODE index=null startindex=10 endindex=16 span=10 15 ||| (VP (VB reach) (NP (NP (NN base) (NNS areas)) (PP (IN behind) (NP (NN enemy) (NNS lines))))) | |
NODE index=null startindex=10 endindex=11 span=10 10 ||| (VB reach) | |
NODE index=11 startindex=10 endindex=11 span=null ||| reach | |
NODE index=null startindex=11 endindex=16 span=11 15 ||| (NP (NP (NN base) (NNS areas)) (PP (IN behind) (NP (NN enemy) (NNS lines)))) | |
NODE index=null startindex=11 endindex=13 span=11 12 ||| (NP (NN base) (NNS areas)) | |
NODE index=null startindex=11 endindex=12 span=11 11 ||| (NN base) | |
NODE index=12 startindex=11 endindex=12 span=null ||| base | |
NODE index=null startindex=12 endindex=13 span=12 12 ||| (NNS areas) | |
NODE index=13 startindex=12 endindex=13 span=null ||| areas | |
NODE index=null startindex=13 endindex=16 span=13 15 ||| (PP (IN behind) (NP (NN enemy) (NNS lines))) | |
NODE index=null startindex=13 endindex=14 span=13 13 ||| (IN behind) | |
NODE index=14 startindex=13 endindex=14 span=null ||| behind | |
NODE index=null startindex=14 endindex=16 span=14 15 ||| (NP (NN enemy) (NNS lines)) | |
NODE index=null startindex=14 endindex=15 span=14 14 ||| (NN enemy) | |
NODE index=15 startindex=14 endindex=15 span=null ||| enemy | |
NODE index=null startindex=15 endindex=16 span=15 15 ||| (NNS lines) | |
NODE index=16 startindex=15 endindex=16 span=null ||| lines | |
NODE index=null startindex=16 endindex=17 span=16 16 ||| (, ,) | |
NODE index=17 startindex=16 endindex=17 span=null ||| , | |
NODE index=null startindex=17 endindex=36 span=17 35 ||| (S (VP (VP (VBG stirring) (PRT (RP up)) (NP (JJ anti-Japanese) (NN spirit)) (PP (IN throughout) (NP (DT the) (NN nation)))) (CC and) (VP (VBG influencing) (NP (NP (DT the) (NN situation)) (PP (IN of) (NP (NP (DT the) (JJ anti-fascist) (NN war)) (PP (IN of) (NP (DT the) (NNS people))))) (ADVP (RB worldwide)))))) | |
NODE index=null startindex=17 endindex=36 span=17 35 ||| (VP (VP (VBG stirring) (PRT (RP up)) (NP (JJ anti-Japanese) (NN spirit)) (PP (IN throughout) (NP (DT the) (NN nation)))) (CC and) (VP (VBG influencing) (NP (NP (DT the) (NN situation)) (PP (IN of) (NP (NP (DT the) (JJ anti-fascist) (NN war)) (PP (IN of) (NP (DT the) (NNS people))))) (ADVP (RB worldwide))))) | |
NODE index=null startindex=17 endindex=24 span=17 23 ||| (VP (VBG stirring) (PRT (RP up)) (NP (JJ anti-Japanese) (NN spirit)) (PP (IN throughout) (NP (DT the) (NN nation)))) | |
NODE index=null startindex=17 endindex=18 span=17 17 ||| (VBG stirring) | |
NODE index=18 startindex=17 endindex=18 span=null ||| stirring | |
NODE index=null startindex=18 endindex=19 span=18 18 ||| (PRT (RP up)) | |
NODE index=null startindex=18 endindex=19 span=18 18 ||| (RP up) | |
NODE index=19 startindex=18 endindex=19 span=null ||| up | |
NODE index=null startindex=19 endindex=21 span=19 20 ||| (NP (JJ anti-Japanese) (NN spirit)) | |
NODE index=null startindex=19 endindex=20 span=19 19 ||| (JJ anti-Japanese) | |
NODE index=20 startindex=19 endindex=20 span=null ||| anti-Japanese | |
NODE index=null startindex=20 endindex=21 span=20 20 ||| (NN spirit) | |
NODE index=21 startindex=20 endindex=21 span=null ||| spirit | |
NODE index=null startindex=21 endindex=24 span=21 23 ||| (PP (IN throughout) (NP (DT the) (NN nation))) | |
NODE index=null startindex=21 endindex=22 span=21 21 ||| (IN throughout) | |
NODE index=22 startindex=21 endindex=22 span=null ||| throughout | |
NODE index=null startindex=22 endindex=24 span=22 23 ||| (NP (DT the) (NN nation)) | |
NODE index=null startindex=22 endindex=23 span=22 22 ||| (DT the) | |
NODE index=23 startindex=22 endindex=23 span=null ||| the | |
NODE index=null startindex=23 endindex=24 span=23 23 ||| (NN nation) | |
NODE index=24 startindex=23 endindex=24 span=null ||| nation | |
NODE index=null startindex=24 endindex=25 span=24 24 ||| (CC and) | |
NODE index=25 startindex=24 endindex=25 span=null ||| and | |
NODE index=null startindex=25 endindex=36 span=25 35 ||| (VP (VBG influencing) (NP (NP (DT the) (NN situation)) (PP (IN of) (NP (NP (DT the) (JJ anti-fascist) (NN war)) (PP (IN of) (NP (DT the) (NNS people))))) (ADVP (RB worldwide)))) | |
NODE index=null startindex=25 endindex=26 span=25 25 ||| (VBG influencing) | |
NODE index=26 startindex=25 endindex=26 span=null ||| influencing | |
NODE index=null startindex=26 endindex=36 span=26 35 ||| (NP (NP (DT the) (NN situation)) (PP (IN of) (NP (NP (DT the) (JJ anti-fascist) (NN war)) (PP (IN of) (NP (DT the) (NNS people))))) (ADVP (RB worldwide))) | |
NODE index=null startindex=26 endindex=28 span=26 27 ||| (NP (DT the) (NN situation)) | |
NODE index=null startindex=26 endindex=27 span=26 26 ||| (DT the) | |
NODE index=27 startindex=26 endindex=27 span=null ||| the | |
NODE index=null startindex=27 endindex=28 span=27 27 ||| (NN situation) | |
NODE index=28 startindex=27 endindex=28 span=null ||| situation | |
NODE index=null startindex=28 endindex=35 span=28 34 ||| (PP (IN of) (NP (NP (DT the) (JJ anti-fascist) (NN war)) (PP (IN of) (NP (DT the) (NNS people))))) | |
NODE index=null startindex=28 endindex=29 span=28 28 ||| (IN of) | |
NODE index=29 startindex=28 endindex=29 span=null ||| of | |
NODE index=null startindex=29 endindex=35 span=29 34 ||| (NP (NP (DT the) (JJ anti-fascist) (NN war)) (PP (IN of) (NP (DT the) (NNS people)))) | |
NODE index=null startindex=29 endindex=32 span=29 31 ||| (NP (DT the) (JJ anti-fascist) (NN war)) | |
NODE index=null startindex=29 endindex=30 span=29 29 ||| (DT the) | |
NODE index=30 startindex=29 endindex=30 span=null ||| the | |
NODE index=null startindex=30 endindex=31 span=30 30 ||| (JJ anti-fascist) | |
NODE index=31 startindex=30 endindex=31 span=null ||| anti-fascist | |
NODE index=null startindex=31 endindex=32 span=31 31 ||| (NN war) | |
NODE index=32 startindex=31 endindex=32 span=null ||| war | |
NODE index=null startindex=32 endindex=35 span=32 34 ||| (PP (IN of) (NP (DT the) (NNS people))) | |
NODE index=null startindex=32 endindex=33 span=32 32 ||| (IN of) | |
NODE index=33 startindex=32 endindex=33 span=null ||| of | |
NODE index=null startindex=33 endindex=35 span=33 34 ||| (NP (DT the) (NNS people)) | |
NODE index=null startindex=33 endindex=34 span=33 33 ||| (DT the) | |
NODE index=34 startindex=33 endindex=34 span=null ||| the | |
NODE index=null startindex=34 endindex=35 span=34 34 ||| (NNS people) | |
NODE index=35 startindex=34 endindex=35 span=null ||| people | |
NODE index=null startindex=35 endindex=36 span=35 35 ||| (ADVP (RB worldwide)) | |
NODE index=null startindex=35 endindex=36 span=35 35 ||| (RB worldwide) | |
NODE index=36 startindex=35 endindex=36 span=null ||| worldwide | |
NODE index=null startindex=36 endindex=37 span=36 36 ||| (. .) | |
NODE index=37 startindex=36 endindex=37 span=null ||| . |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment