Created
March 23, 2017 13:52
-
-
Save developer-sdk/1c7455e6853c7d3f5c567f52635e7530 to your computer and use it in GitHub Desktop.
hive UDF example
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.util.List; | |
import org.apache.hadoop.hive.ql.exec.Description; | |
import org.apache.hadoop.hive.ql.exec.UDFArgumentException; | |
import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; | |
import org.apache.hadoop.hive.ql.metadata.HiveException; | |
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; | |
import org.apache.hadoop.hive.serde2.lazy.LazyString; | |
import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; | |
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; | |
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; | |
import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector; | |
import org.apache.hadoop.io.IntWritable; | |
@Description(name="sumListStringLength", value="_FUNC_(value) - Returns value that sum list string length.", extended="Example:\n > SELECT _FUNC_(Array<String>) FROM table LIMIT 1;") | |
public class ListGenericUDF extends GenericUDF { | |
ListObjectInspector listOi; | |
@Override | |
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { | |
// initialize 함수에서는 다음과 같은 역활을 진행 | |
// 입력받은 파라미터에 대한 검증 | |
// 반환하는 파라미터에 대한 검증 | |
// 함수에 입력받는 파라미터 개수 확인 | |
if(arguments.length != 1) | |
throw new UDFArgumentLengthException("function argument need 1."); | |
// 파라미터의 타입 확인 | |
ObjectInspector inspector = arguments[0]; | |
if( !(inspector instanceof ListObjectInspector) ) | |
throw new UDFArgumentException("function argument need List"); | |
listOi = (ListObjectInspector) inspector; | |
// 입력받는 리스트내 엘리먼트의 객체 타입 확인 | |
if( !(listOi.getListElementObjectInspector() instanceof StringObjectInspector) ) | |
throw new UDFArgumentException("array argument need "); | |
// 반환은 문자열의 수이므로 int 객체 반환 | |
return PrimitiveObjectInspectorFactory.writableIntObjectInspector; | |
} | |
@SuppressWarnings("unchecked") | |
@Override | |
public Object evaluate(DeferredObject[] arguments) throws HiveException { | |
// arguments의 객체를 형변환 | |
List<LazyString> list = (List<LazyString>) listOi.getList(arguments[0].get()); | |
if(list == null) | |
return null; | |
int sum = 0; | |
for(LazyString str : list) { | |
sum += str.getWritableObject().getLength(); | |
} | |
return new IntWritable(sum); | |
} | |
@Override | |
public String getDisplayString(String[] children) { | |
StringBuffer buffer = new StringBuffer(); | |
buffer.append("sumListStringLength(Array<String>), "); | |
for(String child : children) | |
buffer.append(child).append(","); | |
return buffer.toString(); | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.util.Map; | |
import java.util.TreeMap; | |
import org.apache.hadoop.hive.ql.exec.UDFArgumentException; | |
import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; | |
import org.apache.hadoop.hive.ql.metadata.HiveException; | |
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; | |
import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; | |
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; | |
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; | |
import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector; | |
import org.apache.hadoop.io.Text; | |
public class MapGenericUDF extends GenericUDF { | |
MapObjectInspector mapOi; | |
StringObjectInspector keyOi; | |
@Override | |
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { | |
// initialize 함수에서는 다음과 같은 역활을 진행 | |
// 입력받은 파라미터에 대한 검증 | |
// 반환하는 파라미터에 대한 검증 | |
// 함수에 입력받는 파라미터 개수 확인 | |
if(arguments.length != 2) | |
throw new UDFArgumentLengthException("function argument need 2."); | |
// 파라미터의 타입 확인 | |
ObjectInspector inspector = arguments[0]; | |
if( !(inspector instanceof MapObjectInspector) ) | |
throw new UDFArgumentException("function argument Map, String"); | |
mapOi = (MapObjectInspector) inspector; | |
ObjectInspector strIns = arguments[1]; | |
if( !(strIns instanceof StringObjectInspector) ) | |
throw new UDFArgumentException("function argument Map, String"); | |
keyOi = (StringObjectInspector) strIns; | |
// 맵의 키와 값의 객체 타입이 String 인지 확인 | |
if( !(mapOi.getMapKeyObjectInspector() instanceof StringObjectInspector) || !(mapOi.getMapValueObjectInspector() instanceof StringObjectInspector) ) | |
throw new UDFArgumentException("Map type is Map<String, String>"); | |
// 반환은 문자열의 수이므로 int 객체 반환 | |
return PrimitiveObjectInspectorFactory.javaStringObjectInspector; | |
} | |
@SuppressWarnings("unchecked") | |
@Override | |
public Object evaluate(DeferredObject[] arguments) throws HiveException { | |
Map<Object, Object> objMap = (Map<Object, Object>) mapOi.getMap(arguments[0].get()); | |
Map<String, String> map = converMapType(objMap); | |
Text key = keyOi.getPrimitiveWritableObject(arguments[1].get()); | |
return map.containsKey(key.toString()) ? map.get(key.toString()) : "None Data"; | |
} | |
/** | |
* Map의 키 타입이 String이 아니어서 값을 빼낼수 없을 수 있으므로 키, 밸류를 String 으로 변환 | |
* | |
* @param strMap | |
* @return | |
*/ | |
public Map<String, String> converMapType(Map<Object, Object> strMap) { | |
Map<String, String> newMap = new TreeMap<String, String>(); | |
for (Object keyObj : strMap.keySet()) { | |
newMap.put(keyObj.toString(), strMap.get(keyObj).toString()); | |
} | |
return newMap; | |
} | |
@Override | |
public String getDisplayString(String[] children) { | |
return "Map<String, String>"; | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.util.Map; | |
import org.apache.hadoop.hive.ql.exec.UDF; | |
import org.apache.hadoop.io.Text; | |
public class SampleUDF extends UDF { | |
public Text evaluate(Text text) { | |
// 입력받은 문자를 대문자로 반환 | |
return new Text(text.toString().toUpperCase()); | |
} | |
public int evaluate(int number) { | |
// 입력받은 숫자에 1을 더하여 반환 | |
return number + 1; | |
} | |
public String evaluate(Map<String, String> map, String key) { | |
// 입력받은 키의 밸류가 있으면 반환하고, 없으면 None를 반환 | |
return map.containsKey(key) ? map.get(key) : "None"; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment