Skip to content

Instantly share code, notes, and snippets.

@eellpp
Created June 3, 2016 00:07
Show Gist options
  • Save eellpp/aa5ca35f97027e42d5df6663e95cc27c to your computer and use it in GitHub Desktop.
Save eellpp/aa5ca35f97027e42d5df6663e95cc27c to your computer and use it in GitHub Desktop.
/*
* testApp.java
*
* Copyright (c) 2000-2012, The University of Sheffield.
*
* This file is part of GATE (see http://gate.ac.uk/), and is free
* software, licenced under the GNU Library General Public License,
* Version 3, 29 June 2007.
*
* A copy of this licence is included in the distribution in the file
* licence.html, and is also available at http://gate.ac.uk/gate/licence.html.
*
* sandeepchellappen, 1/6/2016
*
* For details on the configuration options, see the user guide:
* http://gate.ac.uk/cgi-bin/userguide/sec:creole-model:config
*/
package com.mytests;
import gate.*;
import gate.creole.*;
import gate.creole.metadata.*;
import gate.util.*;
import java.util.*;
import java.io.*;
import java.net.*;
import gate.*;
import gate.creole.*;
import gate.util.*;
import gate.util.persistence.PersistenceManager;
import gate.corpora.RepositioningInfo;
/**
* This class is the implementation of the resource TESTAPP.
*/
@CreoleResource(name = "testApp",
comment = "Add a descriptive comment about this resource")
public class testApp {
/** The Corpus Pipeline application to contain ANNIE */
private CorpusController annieController;
/**
* Initialise the ANNIE system. This creates a "corpus pipeline"
* application that can be used to run sets of documents through
* the extraction system.
*/
public void initAnnie() throws GateException, IOException {
Out.prln("Initialising ANNIE...");
// load the ANNIE application from the saved state in plugins/ANNIE
File pluginsHome = Gate.getPluginsHome();
File anniePlugin = new File(pluginsHome, "ANNIE");
// File annieGapp = new File(anniePlugin, "ANNIE_with_defaults.gapp");
File annieGapp = new File("/Users/sandeepchellappen/Work/Learning/nlp/GateTuorial", "testApp.gapp");
annieController =
(CorpusController) PersistenceManager.loadObjectFromFile(annieGapp);
Out.prln("...ANNIE loaded");
} // initAnnie()
/** Tell ANNIE's controller about the corpus you want to run on */
public void setCorpus(Corpus corpus) {
annieController.setCorpus(corpus);
} // setCorpus
/** Run ANNIE */
public void execute() throws GateException {
Out.prln("Running ANNIE...");
annieController.execute();
Out.prln("...ANNIE complete");
} // execute()
/**
* Run from the command-line, with a list of URLs as argument.
* <P><B>NOTE:</B><BR>
* This code will run with all the documents in memory - if you
* want to unload each from memory after use, add code to store
* the corpus in a DataStore.
*/
public static void main(String args[]) throws GateException, IOException {
// initialise the GATE library
Out.prln("Initialising GATE...");
Gate.init();
Out.prln("...GATE initialised");
// initialise ANNIE (this may take several minutes)
testApp annie = new testApp();
annie.initAnnie();
// create a GATE corpus and add a document for each command-line
// argument
Corpus corpus = Factory.newCorpus("StandAloneAnnie corpus");
for(int i = 0; i < args.length; i++) {
URL u = new URL(args[i]);
FeatureMap params = Factory.newFeatureMap();
params.put("sourceUrl", u);
params.put("preserveOriginalContent", new Boolean(true));
params.put("collectRepositioningInfo", new Boolean(true));
Out.prln("Creating doc for " + u);
Document doc = (Document)
Factory.createResource("gate.corpora.DocumentImpl", params);
corpus.add(doc);
} // for each of args
// tell the pipeline about the corpus and run it
annie.setCorpus(corpus);
annie.execute();
// for each document, get an XML document with the
// person and location names added
Iterator iter = corpus.iterator();
int count = 0;
String startTagPart_1 = "<span GateID=\"";
String startTagPart_2 = "\" title=\"";
String startTagPart_3 = "\" style=\"background:Red;\">";
String endTag = "</span>";
while(iter.hasNext()) {
Document doc = (Document) iter.next();
AnnotationSet defaultAnnotSet = doc.getAnnotations();
// Get all the sentence annotations and for each sentence
// print all the tokens and their associated annotation
String[] reqTypes = {"Sentence"};
AnnotationSet sentAnns = annie.getAllAnnotationForTypes(defaultAnnotSet ,reqTypes);
for(Annotation sent: sentAnns){
Long startOffset = sent.getStartNode().getOffset();
Long endOffset = sent.getEndNode().getOffset();
// Get the annotation set within this range
String[] reqTypes1 = {"Location"};
Set<Annotation> anns = annie.getAllAnnotationForTypesWithinOffset(defaultAnnotSet ,reqTypes1,startOffset,endOffset);
for(Annotation ann: anns){
String word = gate.Utils.stringFor(doc,ann);
String type = ann.getType();
System.out.println(word + " : " + type);
}
}
// Set annotTypesRequired = new HashSet();
// annotTypesRequired.add("Person");
// annotTypesRequired.add("Location");
// Set<Annotation> peopleAndPlaces =
// new HashSet<Annotation>(defaultAnnotSet.get(annotTypesRequired));
//
// FeatureMap features = doc.getFeatures();
// String originalContent = (String)
// features.get(GateConstants.ORIGINAL_DOCUMENT_CONTENT_FEATURE_NAME);
// RepositioningInfo info = (RepositioningInfo)
// features.get(GateConstants.DOCUMENT_REPOSITIONING_INFO_FEATURE_NAME);
//
// ++count;
// File file = new File("StANNIE_" + count + ".HTML");
// Out.prln("File name: '"+file.getAbsolutePath()+"'");
// if(originalContent != null && info != null) {
// Out.prln("OrigContent and reposInfo existing. Generate file...");
//
// Iterator it = peopleAndPlaces.iterator();
// Annotation currAnnot;
// SortedAnnotationList sortedAnnotations = new SortedAnnotationList();
//
// while(it.hasNext()) {
// currAnnot = (Annotation) it.next();
// sortedAnnotations.addSortedExclusive(currAnnot);
// } // while
//
// StringBuffer editableContent = new StringBuffer(originalContent);
// long insertPositionEnd;
// long insertPositionStart;
// // insert anotation tags backward
// Out.prln("Unsorted annotations count: "+peopleAndPlaces.size());
// Out.prln("Sorted annotations count: "+sortedAnnotations.size());
// for(int i=sortedAnnotations.size()-1; i>=0; --i) {
// currAnnot = (Annotation) sortedAnnotations.get(i);
// insertPositionStart =
// currAnnot.getStartNode().getOffset().longValue();
// insertPositionStart = info.getOriginalPos(insertPositionStart);
// insertPositionEnd = currAnnot.getEndNode().getOffset().longValue();
// insertPositionEnd = info.getOriginalPos(insertPositionEnd, true);
// if(insertPositionEnd != -1 && insertPositionStart != -1) {
// editableContent.insert((int)insertPositionEnd, endTag);
// editableContent.insert((int)insertPositionStart, startTagPart_3);
// editableContent.insert((int)insertPositionStart,
// currAnnot.getType());
// editableContent.insert((int)insertPositionStart, startTagPart_2);
// editableContent.insert((int)insertPositionStart,
// currAnnot.getId().toString());
// editableContent.insert((int)insertPositionStart, startTagPart_1);
// } // if
// } // for
//
// FileWriter writer = new FileWriter(file);
// writer.write(editableContent.toString());
// writer.close();
// } // if - should generate
// else if (originalContent != null) {
// Out.prln("OrigContent existing. Generate file...");
//
// Iterator it = peopleAndPlaces.iterator();
// Annotation currAnnot;
// SortedAnnotationList sortedAnnotations = new SortedAnnotationList();
//
// while(it.hasNext()) {
// currAnnot = (Annotation) it.next();
// sortedAnnotations.addSortedExclusive(currAnnot);
// } // while
//
// StringBuffer editableContent = new StringBuffer(originalContent);
// long insertPositionEnd;
// long insertPositionStart;
// // insert anotation tags backward
// Out.prln("Unsorted annotations count: "+peopleAndPlaces.size());
// Out.prln("Sorted annotations count: "+sortedAnnotations.size());
// for(int i=sortedAnnotations.size()-1; i>=0; --i) {
// currAnnot = (Annotation) sortedAnnotations.get(i);
// insertPositionStart =
// currAnnot.getStartNode().getOffset().longValue();
// insertPositionEnd = currAnnot.getEndNode().getOffset().longValue();
// if(insertPositionEnd != -1 && insertPositionStart != -1) {
// editableContent.insert((int)insertPositionEnd, endTag);
// editableContent.insert((int)insertPositionStart, startTagPart_3);
// editableContent.insert((int)insertPositionStart,
// currAnnot.getType());
// editableContent.insert((int)insertPositionStart, startTagPart_2);
// editableContent.insert((int)insertPositionStart,
// currAnnot.getId().toString());
// editableContent.insert((int)insertPositionStart, startTagPart_1);
// } // if
// } // for
//
// FileWriter writer = new FileWriter(file);
// writer.write(editableContent.toString());
// writer.close();
// }
// else {
// Out.prln("Content : "+originalContent);
// Out.prln("Repositioning: "+info);
// }
//
// String xmlDocument = doc.toXml(peopleAndPlaces, false);
// String fileName = new String("StANNIE_toXML_" + count + ".HTML");
// FileWriter writer = new FileWriter(fileName);
// writer.write(xmlDocument);
// writer.close();
} // for each doc
} // main
private Set<Annotation> getAllAnnotationForTypesWithinOffset(AnnotationSet defaultAnnotSet, String[] reqTypes,
Long startOffset, Long endOffset) {
Set<Annotation> annotTypesRequired = Collections.emptySet();
// Set<Annotation> peopleAndPlaces =
// new HashSet<Annotation>(defaultAnnotSet.get(annotTypesRequired));
for(String type: reqTypes){
Set<Annotation> annSet = defaultAnnotSet.get(type,startOffset,endOffset);
annotTypesRequired.addAll(annSet);
}
return annotTypesRequired;
}
private AnnotationSet getAllAnnotationForTypes(AnnotationSet aSet ,String[] reqTypes) {
Set annotTypesRequired = new HashSet();
for(String type: reqTypes){
annotTypesRequired.add(type);
}
return (AnnotationSet) aSet.get(annotTypesRequired);
}
/**
*
*/
public static class SortedAnnotationList extends Vector {
public SortedAnnotationList() {
super();
} // SortedAnnotationList
public boolean addSortedExclusive(Annotation annot) {
Annotation currAnot = null;
// overlapping check
for (int i=0; i<size(); ++i) {
currAnot = (Annotation) get(i);
if(annot.overlaps(currAnot)) {
return false;
} // if
} // for
long annotStart = annot.getStartNode().getOffset().longValue();
long currStart;
// insert
for (int i=0; i < size(); ++i) {
currAnot = (Annotation) get(i);
currStart = currAnot.getStartNode().getOffset().longValue();
if(annotStart < currStart) {
insertElementAt(annot, i);
/*
Out.prln("Insert start: "+annotStart+" at position: "+i+" size="+size());
Out.prln("Current start: "+currStart);
*/
return true;
} // if
} // for
int size = size();
insertElementAt(annot, size);
//Out.prln("Insert start: "+annotStart+" at size position: "+size);
return true;
} // addSorted
} // SortedAnnotationList
} // class testApp
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment