Skip to content

Instantly share code, notes, and snippets.

@r
Created September 7, 2010 01:36
Show Gist options
  • Save r/567738 to your computer and use it in GitHub Desktop.
Save r/567738 to your computer and use it in GitHub Desktop.
package com.twitter.appprofiler.pig.piggybank
import org.apache.commons.lang.StringUtils
import org.apache.pig.{EvalFunc, FuncSpec, PigWarning}
import org.apache.pig.backend.executionengine.ExecException
import org.apache.pig.data.{DataByteArray, Tuple}
import java.io.IOException
import java.util.{List => JList}
class TupleParameterCastException(val index: Int) extends ClassCastException
class RichTuple(t: Tuple) {
def getString(i: Int):String =
t.get(i) match {
case s:String => s
case a:DataByteArray => new String(a.get)
case _ => throw new TupleParameterCastException(i)
}
}
object RichTuple {
implicit def tuple2RichTuple(t: Tuple) = new RichTuple(t)
}
class RichString(s: String) {
def levenshteinDistance(os: String):Int = StringUtils.getLevenshteinDistance(s, os)
}
abstract class DefaultEvalFunction[T >: Null <: AnyRef](implicit manifest:scala.reflect.Manifest[T]) extends EvalFunc[T] {
override def exec(input: Tuple):T =
try {
execute(input).getOrElse(null)
} catch {
case ex:Exception =>
warn("error", PigWarning.UDF_WARNING_1)
null
}
override def getReturnType = manifest.erasure.asInstanceOf[java.lang.reflect.Type]
def execute(input: Tuple):Option[T]
}
object LevenshteinDistance {
implicit def string2RichString(s: String) = new RichString(s)
}
class LevenshteinDistance extends DefaultEvalFunction[java.lang.Integer] {
import RichTuple._
import LevenshteinDistance._
def execute(input: Tuple):Option[java.lang.Integer] =
try {
Some(input.getString(0).levenshteinDistance(input.getString(1)))
} catch {
case ex:TupleParameterCastException =>
warn("can't cast parameter %d into a string".format(ex.index), PigWarning.UDF_WARNING_2)
None
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment