Skip to content

Instantly share code, notes, and snippets.

@r
Created August 30, 2010 06:42
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save r/557105 to your computer and use it in GitHub Desktop.
Save r/557105 to your computer and use it in GitHub Desktop.
package com.twitter.appprofiler.pig.piggybank;
import org.apache.commons.lang.StringUtils;
import org.apache.pig.EvalFunc;
import org.apache.pig.PigWarning;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.Tuple;
import java.io.IOException;
public class LevenshteinDistance extends EvalFunc<Integer> {
protected static String getString(Tuple input, int index) throws ExecException {
Object o = input.get(index);
if (o instanceof String)
return (String)o;
else if (o instanceof DataByteArray)
return new String(((DataByteArray)o).get());
else
throw new ClassCastException();
}
public Integer exec(Tuple input) throws IOException {
if ((input == null) || (input.size() < 2) || (input.get(0) == null) || (input.get(1) == null))
return null;
try {
String string1 = null;
try {
string1 = getString(input, 0);
} catch (ClassCastException ex) {
warn("can't cast first parameter to a string", PigWarning.UDF_WARNING_1);
}
String string2 = null;
try {
string2 = getString(input, 1);
} catch (ClassCastException ex) {
warn("can't cast second parameter to a string", PigWarning.UDF_WARNING_1);
}
return StringUtils.getLevenshteinDistance(string1, string2);
} catch (Exception e) {
warn("error", PigWarning.UDF_WARNING_2);
return null;
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment