Skip to content

Instantly share code, notes, and snippets.

@91pavan
Created May 21, 2014 08:46
Show Gist options
  • Save 91pavan/92a8b8c6dd24990efa53 to your computer and use it in GitHub Desktop.
Save 91pavan/92a8b8c6dd24990efa53 to your computer and use it in GitHub Desktop.
CustomMap pig UDF to return non-null bag of tuples
package com.example.pigudf;
import java.io.IOException;
import java.util.Map;
import java.util.HashMap;
import org.apache.pig.EvalFunc;
import org.apache.pig.data.DataType;
import org.apache.pig.data.Tuple;
import org.apache.pig.impl.logicalLayer.schema.Schema;
/*
*
* Build:
mkdir target
javac -d target -cp ../pig.jar ToSparseMap.java
jar -cf target/ToSparseMap.jar -C target/ .
* Pig Usage:
file.pig ->
*REGISTER 'CustomMap.jar'
*DEFINE CustomMap com.example.pigudf.CustomMap();
*
* This class generates a map out of the parameters passed to it & filters out the key-value pair if the value is null or empty
* T = foreach U generate TOMAP($0, $1, $2, $3);
* It generates a map $0->$1, $2->$3
*/
public class CustomMap extends EvalFunc<Map<String, String>> {
@Override
public Map<String,String> exec(Tuple input) throws IOException {
if(input == null || input.size() < 2) {
return null;
}
try {
Map<String, String> output = new HashMap<String, String>();
for(int i=0; i<input.size();i=i+2) {
String key = (String)input.get(i);
String value = (String)input.get(i+1);
if(val!=null&&val.length()>0){
output.put(key,value);
}
}
return output;
catch (ClassCastException e){
throw new RuntimeException("Map key must be a String");
} catch (ArrayIndexOutOfBoundsException e){
throw new RuntimeException("Function input must have even number of parameters");
} catch (Exception e) {
throw new RuntimeException("Error while creating a map", e);
}
}
@Override
public Schema outputSchema(Schema input) {
return new Schema(new Schema.FieldSchema(null, DataType.MAP));
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment