Last active
August 29, 2015 14:06
-
-
Save rjurney/2315553d696136908fb3 to your computer and use it in GitHub Desktop.
ChooseFieldByJava UDF with problemos
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public class ChooseFieldByValue extends EvalFunc<Tuple> | |
{ | |
private TupleFactory tf = TupleFactory.getInstance(); | |
// Enable multiple languages by specifying the model path. See http://text.sourceforge.net/models-1.5/ | |
public Tuple exec(Tuple input) throws IOException | |
{ | |
if(input.size() < 2) { | |
throw new IOException(); | |
} | |
String fieldNameToReturn = input.get(0).toString(); | |
if(fieldNameToReturn == null || fieldNameToReturn == "") { | |
return null; | |
} | |
Tuple outTuple = tf.newTuple(); | |
Schema inputSchema = getInputSchema(); | |
for(int i=0; i < input.size(); i++) | |
{ | |
Schema.FieldSchema fieldSchema = inputSchema.getField(i); | |
System.err.println("i: " + Integer.toString(i)); | |
System.err.println("fieldSchema.byte: [" + DataType.findTypeName(fieldSchema.type) + "]"); | |
System.err.println("fieldSchema.alias: [" + fieldSchema.alias + "]"); | |
System.err.println("fieldNameToReturn: [" + fieldNameToReturn + "]"); | |
Object matchedValue = input.get(i); | |
System.err.println("input.get(i): [" + matchedValue.toString() + "]"); | |
if(fieldSchema.alias.equals(fieldNameToReturn)) { | |
System.err.println("Matched fieldname " + fieldNameToReturn + " with value: " + matchedValue.toString()); | |
outTuple.append(matchedValue); | |
break; | |
} | |
} | |
return outTuple; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package datafu.test.pig.util; | |
import java.util.List; | |
import junit.framework.Assert; | |
import org.adrianwalker.multilinestring.Multiline; | |
import org.apache.pig.data.Tuple; | |
import org.apache.pig.impl.logicalLayer.FrontendException; | |
import org.apache.pig.pigunit.PigTest; | |
import org.joda.time.DateTime; | |
import org.joda.time.DateTimeZone; | |
import org.testng.annotations.Test; | |
import datafu.test.pig.PigTests; | |
public class ChooseFieldByValueTest extends PigTests | |
{ | |
/** | |
define ChooseFieldByValue datafu.pig.util.ChooseFieldByValue(); | |
data = LOAD 'input' using PigStorage(',') AS (fieldName:chararray, text1:chararray, text2:chararray, text3:chararray); | |
data2 = FOREACH data GENERATE ChooseFieldByValue(fieldName,*) as result; | |
describe data2; | |
data3 = FOREACH data2 GENERATE result; | |
STORE data3 INTO 'output'; | |
*/ | |
@Multiline private static String chooseFieldByValueTest; | |
@Test | |
public void chooseFieldByValueTest() throws Exception | |
{ | |
PigTest test = createPigTestFromString(chooseFieldByValueTest); | |
writeLinesToFile("input", | |
"text1,text1,hi,how,are", | |
"text2,text2,you,sir,today", | |
"text3,text3,bob,is,a", | |
"text1,text1,friend,of,mine", | |
"text2,text2,and,I,say", | |
"text3,text3,he,is,nice."); | |
//test.runScript(); | |
assertOutput(test, "data3", | |
"(hi)", | |
"(sir)", | |
"(a)", | |
"(friend)", | |
"(I)", | |
"(nice)"); | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
15:13:44.603 [DEBUG] [TestEventLogger] Gradle test > datafu.test.pig.util.ChooseFieldByValueTest.chooseFieldByValueTest STANDARD_ERROR | |
15:13:44.604 [DEBUG] [TestEventLogger] i: 0 | |
15:13:44.604 [DEBUG] [TestEventLogger] fieldSchema.byte: [chararray] | |
15:13:44.604 [DEBUG] [TestEventLogger] fieldSchema.alias: [fieldName] | |
15:13:44.604 [DEBUG] [TestEventLogger] fieldNameToReturn: [text1] | |
15:13:44.605 [DEBUG] [TestEventLogger] input.get(i): [text1] | |
15:13:44.605 [DEBUG] [TestEventLogger] i: 1 | |
15:13:44.605 [DEBUG] [TestEventLogger] fieldSchema.byte: [chararray] | |
15:13:44.606 [DEBUG] [TestEventLogger] fieldSchema.alias: [fieldName] | |
15:13:44.606 [DEBUG] [TestEventLogger] fieldNameToReturn: [text1] | |
15:13:44.606 [DEBUG] [TestEventLogger] input.get(i): [text1] | |
15:13:44.606 [DEBUG] [TestEventLogger] i: 2 | |
15:13:44.607 [DEBUG] [TestEventLogger] fieldSchema.byte: [chararray] | |
15:13:44.607 [DEBUG] [TestEventLogger] fieldSchema.alias: [text1] | |
15:13:44.607 [DEBUG] [TestEventLogger] fieldNameToReturn: [text1] | |
15:13:44.607 [DEBUG] [TestEventLogger] input.get(i): [text1] | |
15:13:44.608 [DEBUG] [TestEventLogger] Matched fieldname text1 with value: text1 | |
15:13:44.608 [DEBUG] [TestEventLogger] i: 0 | |
15:13:44.608 [DEBUG] [TestEventLogger] fieldSchema.byte: [chararray] | |
15:13:44.609 [DEBUG] [TestEventLogger] fieldSchema.alias: [fieldName] | |
15:13:44.609 [DEBUG] [TestEventLogger] fieldNameToReturn: [text2] | |
15:13:44.609 [DEBUG] [TestEventLogger] input.get(i): [text2] | |
15:13:44.610 [DEBUG] [TestEventLogger] i: 1 | |
15:13:44.611 [DEBUG] [TestEventLogger] fieldSchema.byte: [chararray] | |
15:13:44.611 [DEBUG] [TestEventLogger] fieldSchema.alias: [fieldName] | |
15:13:44.612 [DEBUG] [TestEventLogger] fieldNameToReturn: [text2] | |
15:13:44.612 [DEBUG] [TestEventLogger] input.get(i): [text2] | |
15:13:44.612 [DEBUG] [TestEventLogger] i: 2 | |
15:13:44.613 [DEBUG] [TestEventLogger] fieldSchema.byte: [chararray] | |
15:13:44.613 [DEBUG] [TestEventLogger] fieldSchema.alias: [text1] | |
15:13:44.614 [DEBUG] [TestEventLogger] fieldNameToReturn: [text2] | |
15:13:44.614 [DEBUG] [TestEventLogger] input.get(i): [text2] | |
15:13:44.615 [DEBUG] [TestEventLogger] i: 3 | |
15:13:44.615 [DEBUG] [TestEventLogger] fieldSchema.byte: [chararray] | |
15:13:44.615 [DEBUG] [TestEventLogger] fieldSchema.alias: [text2] | |
15:13:44.616 [DEBUG] [TestEventLogger] fieldNameToReturn: [text2] | |
15:13:44.616 [DEBUG] [TestEventLogger] input.get(i): [you] | |
15:13:44.617 [DEBUG] [TestEventLogger] Matched fieldname text2 with value: you | |
15:13:44.617 [DEBUG] [TestEventLogger] i: 0 | |
15:13:44.618 [DEBUG] [TestEventLogger] fieldSchema.byte: [chararray] | |
15:13:44.618 [DEBUG] [TestEventLogger] fieldSchema.alias: [fieldName] | |
15:13:44.618 [DEBUG] [TestEventLogger] fieldNameToReturn: [text3] | |
15:13:44.618 [DEBUG] [TestEventLogger] input.get(i): [text3] | |
15:13:44.619 [DEBUG] [TestEventLogger] i: 1 | |
15:13:44.619 [DEBUG] [TestEventLogger] fieldSchema.byte: [chararray] | |
15:13:44.619 [DEBUG] [TestEventLogger] fieldSchema.alias: [fieldName] | |
15:13:44.619 [DEBUG] [TestEventLogger] fieldNameToReturn: [text3] | |
15:13:44.620 [DEBUG] [TestEventLogger] input.get(i): [text3] | |
15:13:44.620 [DEBUG] [TestEventLogger] i: 2 | |
15:13:44.620 [DEBUG] [TestEventLogger] fieldSchema.byte: [chararray] | |
15:13:44.620 [DEBUG] [TestEventLogger] fieldSchema.alias: [text1] | |
15:13:44.621 [DEBUG] [TestEventLogger] fieldNameToReturn: [text3] | |
15:13:44.621 [DEBUG] [TestEventLogger] input.get(i): [text3] | |
15:13:44.621 [DEBUG] [TestEventLogger] i: 3 | |
15:13:44.621 [DEBUG] [TestEventLogger] fieldSchema.byte: [chararray] | |
15:13:44.622 [DEBUG] [TestEventLogger] fieldSchema.alias: [text2] | |
15:13:44.622 [DEBUG] [TestEventLogger] fieldNameToReturn: [text3] | |
15:13:44.622 [DEBUG] [TestEventLogger] input.get(i): [bob] | |
15:13:44.622 [DEBUG] [TestEventLogger] i: 4 | |
15:13:44.623 [DEBUG] [TestEventLogger] fieldSchema.byte: [chararray] | |
15:13:44.623 [DEBUG] [TestEventLogger] fieldSchema.alias: [text3] | |
15:13:44.623 [DEBUG] [TestEventLogger] fieldNameToReturn: [text3] | |
15:13:44.623 [DEBUG] [TestEventLogger] input.get(i): [is] | |
15:13:44.624 [DEBUG] [TestEventLogger] Matched fieldname text3 with value: is | |
15:13:44.624 [DEBUG] [TestEventLogger] i: 0 | |
15:13:44.624 [DEBUG] [TestEventLogger] fieldSchema.byte: [chararray] | |
15:13:44.624 [DEBUG] [TestEventLogger] fieldSchema.alias: [fieldName] | |
15:13:44.625 [DEBUG] [TestEventLogger] fieldNameToReturn: [text1] | |
15:13:44.625 [DEBUG] [TestEventLogger] input.get(i): [text1] | |
15:13:44.625 [DEBUG] [TestEventLogger] i: 1 | |
15:13:44.625 [DEBUG] [TestEventLogger] fieldSchema.byte: [chararray] | |
15:13:44.626 [DEBUG] [TestEventLogger] fieldSchema.alias: [fieldName] | |
15:13:44.626 [DEBUG] [TestEventLogger] fieldNameToReturn: [text1] | |
15:13:44.626 [DEBUG] [TestEventLogger] input.get(i): [text1] | |
15:13:44.626 [DEBUG] [TestEventLogger] i: 2 | |
15:13:44.626 [DEBUG] [TestEventLogger] fieldSchema.byte: [chararray] | |
15:13:44.627 [DEBUG] [TestEventLogger] fieldSchema.alias: [text1] | |
15:13:44.627 [DEBUG] [TestEventLogger] fieldNameToReturn: [text1] | |
15:13:44.627 [DEBUG] [TestEventLogger] input.get(i): [text1] | |
15:13:44.627 [DEBUG] [TestEventLogger] Matched fieldname text1 with value: text1 | |
15:13:44.628 [DEBUG] [TestEventLogger] i: 0 | |
15:13:44.628 [DEBUG] [TestEventLogger] fieldSchema.byte: [chararray] | |
15:13:44.628 [DEBUG] [TestEventLogger] fieldSchema.alias: [fieldName] | |
15:13:44.628 [DEBUG] [TestEventLogger] fieldNameToReturn: [text2] | |
15:13:44.629 [DEBUG] [TestEventLogger] input.get(i): [text2] | |
15:13:44.629 [DEBUG] [TestEventLogger] i: 1 | |
15:13:44.629 [DEBUG] [TestEventLogger] fieldSchema.byte: [chararray] | |
15:13:44.629 [DEBUG] [TestEventLogger] fieldSchema.alias: [fieldName] | |
15:13:44.629 [DEBUG] [TestEventLogger] fieldNameToReturn: [text2] | |
15:13:44.630 [DEBUG] [TestEventLogger] input.get(i): [text2] | |
15:13:44.630 [DEBUG] [TestEventLogger] i: 2 | |
15:13:44.630 [DEBUG] [TestEventLogger] fieldSchema.byte: [chararray] | |
15:13:44.630 [DEBUG] [TestEventLogger] fieldSchema.alias: [text1] | |
15:13:44.631 [DEBUG] [TestEventLogger] fieldNameToReturn: [text2] | |
15:13:44.631 [DEBUG] [TestEventLogger] input.get(i): [text2] | |
15:13:44.631 [DEBUG] [TestEventLogger] i: 3 | |
15:13:44.631 [DEBUG] [TestEventLogger] fieldSchema.byte: [chararray] | |
15:13:44.631 [DEBUG] [TestEventLogger] fieldSchema.alias: [text2] | |
15:13:44.632 [DEBUG] [TestEventLogger] fieldNameToReturn: [text2] | |
15:13:44.632 [DEBUG] [TestEventLogger] input.get(i): [and] | |
15:13:44.632 [DEBUG] [TestEventLogger] Matched fieldname text2 with value: and | |
15:13:44.632 [DEBUG] [TestEventLogger] i: 0 | |
15:13:44.633 [DEBUG] [TestEventLogger] fieldSchema.byte: [chararray] | |
15:13:44.633 [DEBUG] [TestEventLogger] fieldSchema.alias: [fieldName] | |
15:13:44.633 [DEBUG] [TestEventLogger] fieldNameToReturn: [text3] | |
15:13:44.633 [DEBUG] [TestEventLogger] input.get(i): [text3] | |
15:13:44.634 [DEBUG] [TestEventLogger] i: 1 | |
15:13:44.634 [DEBUG] [TestEventLogger] fieldSchema.byte: [chararray] | |
15:13:44.634 [DEBUG] [TestEventLogger] fieldSchema.alias: [fieldName] | |
15:13:44.634 [DEBUG] [TestEventLogger] fieldNameToReturn: [text3] | |
15:13:44.634 [DEBUG] [TestEventLogger] input.get(i): [text3] | |
15:13:44.635 [DEBUG] [TestEventLogger] i: 2 | |
15:13:44.635 [DEBUG] [TestEventLogger] fieldSchema.byte: [chararray] | |
15:13:44.635 [DEBUG] [TestEventLogger] fieldSchema.alias: [text1] | |
15:13:44.635 [DEBUG] [TestEventLogger] fieldNameToReturn: [text3] | |
15:13:44.636 [DEBUG] [TestEventLogger] input.get(i): [text3] | |
15:13:44.636 [DEBUG] [TestEventLogger] i: 3 | |
15:13:44.636 [DEBUG] [TestEventLogger] fieldSchema.byte: [chararray] | |
15:13:44.636 [DEBUG] [TestEventLogger] fieldSchema.alias: [text2] | |
15:13:44.636 [DEBUG] [TestEventLogger] fieldNameToReturn: [text3] | |
15:13:44.637 [DEBUG] [TestEventLogger] input.get(i): [he] | |
15:13:44.637 [DEBUG] [TestEventLogger] i: 4 | |
15:13:44.637 [DEBUG] [TestEventLogger] fieldSchema.byte: [chararray] | |
15:13:44.637 [DEBUG] [TestEventLogger] fieldSchema.alias: [text3] | |
15:13:44.638 [DEBUG] [TestEventLogger] fieldNameToReturn: [text3] | |
15:13:44.638 [DEBUG] [TestEventLogger] input.get(i): [is] | |
15:13:44.638 [DEBUG] [TestEventLogger] Matched fieldname text3 with value: is | |
15:13:47.865 [DEBUG] [TestEventLogger] | |
15:13:47.865 [DEBUG] [TestEventLogger] Gradle test > datafu.test.pig.util.ChooseFieldByValueTest.chooseFieldByValueTest STANDARD_OUT | |
15:13:47.866 [DEBUG] [TestEventLogger] Values for data3: | |
15:13:47.868 [DEBUG] [TestEventLogger] ((text1)) | |
15:13:47.868 [DEBUG] [TestEventLogger] ((you)) | |
15:13:47.869 [DEBUG] [TestEventLogger] ((is)) | |
15:13:47.869 [DEBUG] [TestEventLogger] ((text1)) | |
15:13:47.869 [DEBUG] [TestEventLogger] ((and)) | |
15:13:47.870 [DEBUG] [TestEventLogger] ((is)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment