Created
August 30, 2010 06:42
-
-
Save r/557105 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package com.twitter.appprofiler.pig.piggybank; | |
import org.apache.commons.lang.StringUtils; | |
import org.apache.pig.EvalFunc; | |
import org.apache.pig.PigWarning; | |
import org.apache.pig.backend.executionengine.ExecException; | |
import org.apache.pig.data.DataByteArray; | |
import org.apache.pig.data.Tuple; | |
import java.io.IOException; | |
public class LevenshteinDistance extends EvalFunc<Integer> { | |
protected static String getString(Tuple input, int index) throws ExecException { | |
Object o = input.get(index); | |
if (o instanceof String) | |
return (String)o; | |
else if (o instanceof DataByteArray) | |
return new String(((DataByteArray)o).get()); | |
else | |
throw new ClassCastException(); | |
} | |
public Integer exec(Tuple input) throws IOException { | |
if ((input == null) || (input.size() < 2) || (input.get(0) == null) || (input.get(1) == null)) | |
return null; | |
try { | |
String string1 = null; | |
try { | |
string1 = getString(input, 0); | |
} catch (ClassCastException ex) { | |
warn("can't cast first parameter to a string", PigWarning.UDF_WARNING_1); | |
} | |
String string2 = null; | |
try { | |
string2 = getString(input, 1); | |
} catch (ClassCastException ex) { | |
warn("can't cast second parameter to a string", PigWarning.UDF_WARNING_1); | |
} | |
return StringUtils.getLevenshteinDistance(string1, string2); | |
} catch (Exception e) { | |
warn("error", PigWarning.UDF_WARNING_2); | |
return null; | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment