Skip to content

Instantly share code, notes, and snippets.

@s1monw
Created October 10, 2012 20:32
Show Gist options
  • Save s1monw/3868238 to your computer and use it in GitHub Desktop.
Save s1monw/3868238 to your computer and use it in GitHub Desktop.
readfloor
/**
* Enumerates all paths in the automaton that also intersect the FST,
* accumulating the FST end node and output for each path.
*/
public static <T> List<Path<T>> intersectPrefixPaths(Automaton a, FST<T> fst,
final boolean supportRanges) throws IOException {
final List<Path<T>> queue = new ArrayList<Path<T>>();
final List<Path<T>> endNodes = new ArrayList<Path<T>>();
queue.add(new Path<T>(a.getInitialState(), fst
.getFirstArc(new FST.Arc<T>()), fst.outputs.getNoOutput(),
new IntsRef()));
final FST.Arc<T> scratchArc = new FST.Arc<T>();
final FST.BytesReader fstReader = fst.getBytesReader(0);
// System.out.println("fst/a intersect");
int iter = 0;
while (queue.size() != 0) {
System.out.println(iter++);
final Path<T> path = queue.remove(queue.size() - 1);
// System.out.println(" cycle path=" + path);
if (path.state.isAccept()) {
endNodes.add(path);
}
IntsRef currentInput = path.input;
if (supportRanges) {
for (Transition t : path.state.getTransitions()) {
if (t.getMin() == t.getMax()) {
// System.out.println(" t=" + (char) t.getMin());
final FST.Arc<T> nextArc = fst.findTargetArc(t.getMin(),
path.fstNode, scratchArc, fstReader);
if (nextArc != null) {
// System.out.println((char)t.getMin());
// System.out.println("\t num: " + nextArc.numArcs);
// System.out.println(" fst matches");
// Path continues:
IntsRef newInput = new IntsRef(currentInput.length + 1);
newInput.copyInts(currentInput);
newInput.ints[currentInput.length] = t.getMin();
newInput.length = currentInput.length + 1;
// System.out.println("fixed: " +UnicodeUtil.newString(newInput.ints,newInput.offset, newInput.length));
queue.add(new Path<T>(t.getDest(), new FST.Arc<T>()
.copyFrom(nextArc), fst.outputs
.add(path.output, nextArc.output), newInput));
}
} else {
FST.Arc<T> nextArc = readFloor(t.getMin(),fst,
path.fstNode, scratchArc, fstReader);
do {
if (nextArc != null && nextArc.label <= t.getMax()) {
System.out.println((char)t.getMin() + " " + (char)t.getMax());
System.out.println(nextArc.label);
// System.out.println(" fst matches");
// Path continues:
assert nextArc.label <= t.getMax();
assert nextArc.label >= t.getMin();
System.out.println("\t num: " + nextArc.numArcs);
IntsRef newInput = new IntsRef(currentInput.length + 1);
newInput.copyInts(currentInput);
newInput.ints[currentInput.length] = nextArc.label;
newInput.length = currentInput.length + 1;
// System.out.println(UnicodeUtil.newString(newInput.ints,newInput.offset, newInput.length));
queue.add(new Path<T>(t.getDest(), new FST.Arc<T>()
.copyFrom(nextArc), fst.outputs.add(path.output,
nextArc.output), newInput));
int label = nextArc.label;
nextArc = nextArc.isLast() ? null : fst.readNextRealArc(nextArc, fstReader);;
assert nextArc == null || label < nextArc.label : "last: " + label + " next: " + nextArc.label;
} else {
break;
}
} while (true);
}
}
} else {
for (Transition t : path.state.getTransitions()) {
// TODO: we can fix this if necessary:
if (t.getMin() != t.getMax()) {
throw new IllegalStateException(
"can only handle Transitions that match one character");
}
// System.out.println(" t=" + (char) t.getMin());
final FST.Arc<T> nextArc = fst.findTargetArc(t.getMin(),
path.fstNode, scratchArc, fstReader);
if (nextArc != null) {
// System.out.println(" fst matches");
// Path continues:
IntsRef newInput = new IntsRef(currentInput.length + 1);
newInput.copyInts(currentInput);
newInput.ints[currentInput.length] = t.getMin();
newInput.length = currentInput.length + 1;
// System.out.println(UnicodeUtil.newString(newInput.ints,newInput.offset, newInput.length));
queue.add(new Path<T>(t.getDest(), new FST.Arc<T>()
.copyFrom(nextArc), fst.outputs
.add(path.output, nextArc.output), newInput));
}
}
}
}
// for (Path<T> path2 : endNodes) {
// System.out.println(UnicodeUtil.newString(path2.input.ints,path2.input.offset, path2.input.length));
// }
return endNodes;
}
public static <T> Arc<T> readFloor(int labelToMatch, FST<T> fst, Arc<T> follow,
Arc<T> arc, BytesReader in) throws IOException {
if (labelToMatch == FST.END_LABEL) {
if (follow.isFinal()) {
if (follow.target <= 0) {
arc.flags = FST.BIT_LAST_ARC;
} else {
arc.flags = 0;
// NOTE: nextArc is a node (not an address!) in this case:
arc.nextArc = follow.target;
arc.node = follow.target;
}
arc.output = follow.nextFinalOutput;
arc.label = FST.END_LABEL;
return arc;
} else {
return null;
}
}
if (!FST.targetHasArcs(follow)) {
return null;
}
if (arc.bytesPerArc != 0 && arc.label != FST.END_LABEL) {
// Arcs are fixed array -- use binary search to find
// the target.
fst.readFirstTargetArc(follow, arc, in);
int low = arc.arcIdx;
int high = arc.numArcs - 1;
int mid = 0;
// System.out.println("do arc array low=" + low + " high=" + high +
// " targetLabel=" + targetLabel);
while (low <= high) {
mid = (low + high) >>> 1;
in.pos = arc.posArcsStart;
in.skip(arc.bytesPerArc * mid + 1);
final int midLabel = fst.readLabel(in);
final int cmp = midLabel - labelToMatch;
// System.out.println(" cycle low=" + low + " high=" + high + " mid=" +
// mid + " midLabel=" + midLabel + " cmp=" + cmp);
if (cmp < 0) {
low = mid + 1;
} else if (cmp > 0) {
high = mid - 1;
} else {
arc.arcIdx = mid-1;
return fst.readNextRealArc(arc, in);
}
}
if (low == arc.numArcs) {
// DEAD END!
return null;
}
arc.arcIdx = (low > high ? high : low)-1;
return fst.readNextRealArc(arc, in);
}
// Linear scan
fst.readFirstRealTargetArc(follow.target, arc, in);
while (true) {
// System.out.println(" non-bs cycle");
// TODO: we should fix this code to not have to create
// object for the output of every arc we scan... only
// for the matching arc, if found
if (arc.label >= labelToMatch) {
// System.out.println(" found!");
return arc;
} else if (arc.isLast()) {
return null;
} else {
fst.readNextRealArc(arc, in);
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment