Skip to content

Instantly share code, notes, and snippets.

@brendano
Last active December 27, 2015 15:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save brendano/7345495 to your computer and use it in GitHub Desktop.
Save brendano/7345495 to your computer and use it in GitHub Desktop.
package nlp;
import java.io.IOException;
import java.io.StringReader;
import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.trees.LabeledScoredTreeFactory;
import edu.stanford.nlp.trees.PennTreeReader;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TreeFactory;
import edu.stanford.nlp.trees.TreePrint;
import edu.stanford.nlp.trees.TreeReader;
public class IndexAnnotationsMadness {
public static TreeFactory tree_factory = new LabeledScoredTreeFactory();
public static Tree readTreeFromString(String parseStr) {
//read in the input into a Tree data structure
TreeReader treeReader = new PennTreeReader(new StringReader(parseStr), tree_factory);
Tree inputTree = null;
try{
inputTree = treeReader.readTree();
}catch(IOException e){
e.printStackTrace();
}
return inputTree;
}
public static void prettyPrint(Tree parseTree) {
TreePrint tp = new TreePrint("penn");
tp.printTree(parseTree);
}
public static void main(String[] args) throws IOException {
String data = IOUtils.slurpFile(args[0]);
Tree t = readTreeFromString(data);
prettyPrint(t);
t.indexSpans();
t.indexLeaves();
t.setSpans();
// for (Tree node : t.getLeaves()) {
// System.out.printf("LEAF index=%-4d startindex=%-4d endindex=%d span=%s ||| %s\n",
// ((CoreLabel)node.label()).get(CoreAnnotations.IndexAnnotation.class),
// ((CoreLabel)node.label()).get(CoreAnnotations.BeginIndexAnnotation.class),
// ((CoreLabel)node.label()).get(CoreAnnotations.EndIndexAnnotation.class),
// ((CoreLabel)node.label()).get(CoreAnnotations.SpanAnnotation.class),
// node);
// }
// System.out.println("----------------------------");
for (Tree node : t.preOrderNodeList()) {
System.out.printf("NODE index=%-4d startindex=%-4d endindex=%-4d span=%-10s ||| %s\n",
((CoreLabel)node.label()).get(CoreAnnotations.IndexAnnotation.class),
((CoreLabel)node.label()).get(CoreAnnotations.BeginIndexAnnotation.class),
((CoreLabel)node.label()).get(CoreAnnotations.EndIndexAnnotation.class),
((CoreLabel)node.label()).get(CoreAnnotations.SpanAnnotation.class),
node);
}
}
}
% ~/sem/semsys/java.sh nlp.IndexAnnotationsMadness parse
(ROOT
(S
(NP (DT This) (NN campaign))
(VP (VBD broke)
(PP (IN through)
(NP
(NP (DT the) (JJ Japanese) (NN army) (POS 's))
(NN blockade)))
(S
(VP (TO to)
(VP (VB reach)
(NP
(NP (NN base) (NNS areas))
(PP (IN behind)
(NP (NN enemy) (NNS lines)))))))
(, ,)
(S
(VP
(VP (VBG stirring)
(PRT (RP up))
(NP (JJ anti-Japanese) (NN spirit))
(PP (IN throughout)
(NP (DT the) (NN nation))))
(CC and)
(VP (VBG influencing)
(NP
(NP (DT the) (NN situation))
(PP (IN of)
(NP
(NP (DT the) (JJ anti-fascist) (NN war))
(PP (IN of)
(NP (DT the) (NNS people)))))
(ADVP (RB worldwide)))))))
(. .)))
NODE index=null startindex=0 endindex=37 span=0 36 ||| (ROOT (S (NP (DT This) (NN campaign)) (VP (VBD broke) (PP (IN through) (NP (NP (DT the) (JJ Japanese) (NN army) (POS 's)) (NN blockade))) (S (VP (TO to) (VP (VB reach) (NP (NP (NN base) (NNS areas)) (PP (IN behind) (NP (NN enemy) (NNS lines))))))) (, ,) (S (VP (VP (VBG stirring) (PRT (RP up)) (NP (JJ anti-Japanese) (NN spirit)) (PP (IN throughout) (NP (DT the) (NN nation)))) (CC and) (VP (VBG influencing) (NP (NP (DT the) (NN situation)) (PP (IN of) (NP (NP (DT the) (JJ anti-fascist) (NN war)) (PP (IN of) (NP (DT the) (NNS people))))) (ADVP (RB worldwide))))))) (. .)))
NODE index=null startindex=0 endindex=37 span=0 36 ||| (S (NP (DT This) (NN campaign)) (VP (VBD broke) (PP (IN through) (NP (NP (DT the) (JJ Japanese) (NN army) (POS 's)) (NN blockade))) (S (VP (TO to) (VP (VB reach) (NP (NP (NN base) (NNS areas)) (PP (IN behind) (NP (NN enemy) (NNS lines))))))) (, ,) (S (VP (VP (VBG stirring) (PRT (RP up)) (NP (JJ anti-Japanese) (NN spirit)) (PP (IN throughout) (NP (DT the) (NN nation)))) (CC and) (VP (VBG influencing) (NP (NP (DT the) (NN situation)) (PP (IN of) (NP (NP (DT the) (JJ anti-fascist) (NN war)) (PP (IN of) (NP (DT the) (NNS people))))) (ADVP (RB worldwide))))))) (. .))
NODE index=null startindex=0 endindex=2 span=0 1 ||| (NP (DT This) (NN campaign))
NODE index=null startindex=0 endindex=1 span=0 0 ||| (DT This)
NODE index=1 startindex=0 endindex=1 span=null ||| This
NODE index=null startindex=1 endindex=2 span=1 1 ||| (NN campaign)
NODE index=2 startindex=1 endindex=2 span=null ||| campaign
NODE index=null startindex=2 endindex=36 span=2 35 ||| (VP (VBD broke) (PP (IN through) (NP (NP (DT the) (JJ Japanese) (NN army) (POS 's)) (NN blockade))) (S (VP (TO to) (VP (VB reach) (NP (NP (NN base) (NNS areas)) (PP (IN behind) (NP (NN enemy) (NNS lines))))))) (, ,) (S (VP (VP (VBG stirring) (PRT (RP up)) (NP (JJ anti-Japanese) (NN spirit)) (PP (IN throughout) (NP (DT the) (NN nation)))) (CC and) (VP (VBG influencing) (NP (NP (DT the) (NN situation)) (PP (IN of) (NP (NP (DT the) (JJ anti-fascist) (NN war)) (PP (IN of) (NP (DT the) (NNS people))))) (ADVP (RB worldwide)))))))
NODE index=null startindex=2 endindex=3 span=2 2 ||| (VBD broke)
NODE index=3 startindex=2 endindex=3 span=null ||| broke
NODE index=null startindex=3 endindex=9 span=3 8 ||| (PP (IN through) (NP (NP (DT the) (JJ Japanese) (NN army) (POS 's)) (NN blockade)))
NODE index=null startindex=3 endindex=4 span=3 3 ||| (IN through)
NODE index=4 startindex=3 endindex=4 span=null ||| through
NODE index=null startindex=4 endindex=9 span=4 8 ||| (NP (NP (DT the) (JJ Japanese) (NN army) (POS 's)) (NN blockade))
NODE index=null startindex=4 endindex=8 span=4 7 ||| (NP (DT the) (JJ Japanese) (NN army) (POS 's))
NODE index=null startindex=4 endindex=5 span=4 4 ||| (DT the)
NODE index=5 startindex=4 endindex=5 span=null ||| the
NODE index=null startindex=5 endindex=6 span=5 5 ||| (JJ Japanese)
NODE index=6 startindex=5 endindex=6 span=null ||| Japanese
NODE index=null startindex=6 endindex=7 span=6 6 ||| (NN army)
NODE index=7 startindex=6 endindex=7 span=null ||| army
NODE index=null startindex=7 endindex=8 span=7 7 ||| (POS 's)
NODE index=8 startindex=7 endindex=8 span=null ||| 's
NODE index=null startindex=8 endindex=9 span=8 8 ||| (NN blockade)
NODE index=9 startindex=8 endindex=9 span=null ||| blockade
NODE index=null startindex=9 endindex=16 span=9 15 ||| (S (VP (TO to) (VP (VB reach) (NP (NP (NN base) (NNS areas)) (PP (IN behind) (NP (NN enemy) (NNS lines)))))))
NODE index=null startindex=9 endindex=16 span=9 15 ||| (VP (TO to) (VP (VB reach) (NP (NP (NN base) (NNS areas)) (PP (IN behind) (NP (NN enemy) (NNS lines))))))
NODE index=null startindex=9 endindex=10 span=9 9 ||| (TO to)
NODE index=10 startindex=9 endindex=10 span=null ||| to
NODE index=null startindex=10 endindex=16 span=10 15 ||| (VP (VB reach) (NP (NP (NN base) (NNS areas)) (PP (IN behind) (NP (NN enemy) (NNS lines)))))
NODE index=null startindex=10 endindex=11 span=10 10 ||| (VB reach)
NODE index=11 startindex=10 endindex=11 span=null ||| reach
NODE index=null startindex=11 endindex=16 span=11 15 ||| (NP (NP (NN base) (NNS areas)) (PP (IN behind) (NP (NN enemy) (NNS lines))))
NODE index=null startindex=11 endindex=13 span=11 12 ||| (NP (NN base) (NNS areas))
NODE index=null startindex=11 endindex=12 span=11 11 ||| (NN base)
NODE index=12 startindex=11 endindex=12 span=null ||| base
NODE index=null startindex=12 endindex=13 span=12 12 ||| (NNS areas)
NODE index=13 startindex=12 endindex=13 span=null ||| areas
NODE index=null startindex=13 endindex=16 span=13 15 ||| (PP (IN behind) (NP (NN enemy) (NNS lines)))
NODE index=null startindex=13 endindex=14 span=13 13 ||| (IN behind)
NODE index=14 startindex=13 endindex=14 span=null ||| behind
NODE index=null startindex=14 endindex=16 span=14 15 ||| (NP (NN enemy) (NNS lines))
NODE index=null startindex=14 endindex=15 span=14 14 ||| (NN enemy)
NODE index=15 startindex=14 endindex=15 span=null ||| enemy
NODE index=null startindex=15 endindex=16 span=15 15 ||| (NNS lines)
NODE index=16 startindex=15 endindex=16 span=null ||| lines
NODE index=null startindex=16 endindex=17 span=16 16 ||| (, ,)
NODE index=17 startindex=16 endindex=17 span=null ||| ,
NODE index=null startindex=17 endindex=36 span=17 35 ||| (S (VP (VP (VBG stirring) (PRT (RP up)) (NP (JJ anti-Japanese) (NN spirit)) (PP (IN throughout) (NP (DT the) (NN nation)))) (CC and) (VP (VBG influencing) (NP (NP (DT the) (NN situation)) (PP (IN of) (NP (NP (DT the) (JJ anti-fascist) (NN war)) (PP (IN of) (NP (DT the) (NNS people))))) (ADVP (RB worldwide))))))
NODE index=null startindex=17 endindex=36 span=17 35 ||| (VP (VP (VBG stirring) (PRT (RP up)) (NP (JJ anti-Japanese) (NN spirit)) (PP (IN throughout) (NP (DT the) (NN nation)))) (CC and) (VP (VBG influencing) (NP (NP (DT the) (NN situation)) (PP (IN of) (NP (NP (DT the) (JJ anti-fascist) (NN war)) (PP (IN of) (NP (DT the) (NNS people))))) (ADVP (RB worldwide)))))
NODE index=null startindex=17 endindex=24 span=17 23 ||| (VP (VBG stirring) (PRT (RP up)) (NP (JJ anti-Japanese) (NN spirit)) (PP (IN throughout) (NP (DT the) (NN nation))))
NODE index=null startindex=17 endindex=18 span=17 17 ||| (VBG stirring)
NODE index=18 startindex=17 endindex=18 span=null ||| stirring
NODE index=null startindex=18 endindex=19 span=18 18 ||| (PRT (RP up))
NODE index=null startindex=18 endindex=19 span=18 18 ||| (RP up)
NODE index=19 startindex=18 endindex=19 span=null ||| up
NODE index=null startindex=19 endindex=21 span=19 20 ||| (NP (JJ anti-Japanese) (NN spirit))
NODE index=null startindex=19 endindex=20 span=19 19 ||| (JJ anti-Japanese)
NODE index=20 startindex=19 endindex=20 span=null ||| anti-Japanese
NODE index=null startindex=20 endindex=21 span=20 20 ||| (NN spirit)
NODE index=21 startindex=20 endindex=21 span=null ||| spirit
NODE index=null startindex=21 endindex=24 span=21 23 ||| (PP (IN throughout) (NP (DT the) (NN nation)))
NODE index=null startindex=21 endindex=22 span=21 21 ||| (IN throughout)
NODE index=22 startindex=21 endindex=22 span=null ||| throughout
NODE index=null startindex=22 endindex=24 span=22 23 ||| (NP (DT the) (NN nation))
NODE index=null startindex=22 endindex=23 span=22 22 ||| (DT the)
NODE index=23 startindex=22 endindex=23 span=null ||| the
NODE index=null startindex=23 endindex=24 span=23 23 ||| (NN nation)
NODE index=24 startindex=23 endindex=24 span=null ||| nation
NODE index=null startindex=24 endindex=25 span=24 24 ||| (CC and)
NODE index=25 startindex=24 endindex=25 span=null ||| and
NODE index=null startindex=25 endindex=36 span=25 35 ||| (VP (VBG influencing) (NP (NP (DT the) (NN situation)) (PP (IN of) (NP (NP (DT the) (JJ anti-fascist) (NN war)) (PP (IN of) (NP (DT the) (NNS people))))) (ADVP (RB worldwide))))
NODE index=null startindex=25 endindex=26 span=25 25 ||| (VBG influencing)
NODE index=26 startindex=25 endindex=26 span=null ||| influencing
NODE index=null startindex=26 endindex=36 span=26 35 ||| (NP (NP (DT the) (NN situation)) (PP (IN of) (NP (NP (DT the) (JJ anti-fascist) (NN war)) (PP (IN of) (NP (DT the) (NNS people))))) (ADVP (RB worldwide)))
NODE index=null startindex=26 endindex=28 span=26 27 ||| (NP (DT the) (NN situation))
NODE index=null startindex=26 endindex=27 span=26 26 ||| (DT the)
NODE index=27 startindex=26 endindex=27 span=null ||| the
NODE index=null startindex=27 endindex=28 span=27 27 ||| (NN situation)
NODE index=28 startindex=27 endindex=28 span=null ||| situation
NODE index=null startindex=28 endindex=35 span=28 34 ||| (PP (IN of) (NP (NP (DT the) (JJ anti-fascist) (NN war)) (PP (IN of) (NP (DT the) (NNS people)))))
NODE index=null startindex=28 endindex=29 span=28 28 ||| (IN of)
NODE index=29 startindex=28 endindex=29 span=null ||| of
NODE index=null startindex=29 endindex=35 span=29 34 ||| (NP (NP (DT the) (JJ anti-fascist) (NN war)) (PP (IN of) (NP (DT the) (NNS people))))
NODE index=null startindex=29 endindex=32 span=29 31 ||| (NP (DT the) (JJ anti-fascist) (NN war))
NODE index=null startindex=29 endindex=30 span=29 29 ||| (DT the)
NODE index=30 startindex=29 endindex=30 span=null ||| the
NODE index=null startindex=30 endindex=31 span=30 30 ||| (JJ anti-fascist)
NODE index=31 startindex=30 endindex=31 span=null ||| anti-fascist
NODE index=null startindex=31 endindex=32 span=31 31 ||| (NN war)
NODE index=32 startindex=31 endindex=32 span=null ||| war
NODE index=null startindex=32 endindex=35 span=32 34 ||| (PP (IN of) (NP (DT the) (NNS people)))
NODE index=null startindex=32 endindex=33 span=32 32 ||| (IN of)
NODE index=33 startindex=32 endindex=33 span=null ||| of
NODE index=null startindex=33 endindex=35 span=33 34 ||| (NP (DT the) (NNS people))
NODE index=null startindex=33 endindex=34 span=33 33 ||| (DT the)
NODE index=34 startindex=33 endindex=34 span=null ||| the
NODE index=null startindex=34 endindex=35 span=34 34 ||| (NNS people)
NODE index=35 startindex=34 endindex=35 span=null ||| people
NODE index=null startindex=35 endindex=36 span=35 35 ||| (ADVP (RB worldwide))
NODE index=null startindex=35 endindex=36 span=35 35 ||| (RB worldwide)
NODE index=36 startindex=35 endindex=36 span=null ||| worldwide
NODE index=null startindex=36 endindex=37 span=36 36 ||| (. .)
NODE index=37 startindex=36 endindex=37 span=null ||| .
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment