Skip to content

Instantly share code, notes, and snippets.

@thanoojgithub
Last active December 11, 2015 10:19
Show Gist options
  • Save thanoojgithub/b3509b20e7726ea0e0f0 to your computer and use it in GitHub Desktop.
Save thanoojgithub/b3509b20e7726ea0e0f0 to your computer and use it in GitHub Desktop.
Hive UDF - for Gender function
package com.mapr.hive;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.io.Text;
public class UDFGender extends UDF {
private Text result = new Text();
private static final String male = "Mr.";
private static final String femaleM = "Mrs.";
private static final String femaleSingle = "Miss.";
public Text evaluate(Text name, Text gender, Text mStatus) {
System.out.println("UDFGender.evaluate() :: "+ name + " "+gender+ " "+mStatus);
if (name == null || gender == null) {
System.out.println("UDFGender.evaluate() :: "+ name + " "+gender+ " "+mStatus);
return null;
}else if((gender != null && gender.toString().equals("F")) && (mStatus != null && mStatus.toString().equals("married"))){
System.out.println("UDFGender.evaluate() :: "+ name + " "+gender+ " "+mStatus);
result.set(femaleM +" "+ name);
return result;
}else if((gender != null && gender.toString().equals("F")) && (mStatus != null && mStatus.toString().equals("single"))) {
System.out.println("UDFGender.evaluate() :: "+ name + " "+gender+ " "+mStatus);
result.set(femaleSingle +" "+ name);
return result;
}else {
System.out.println("UDFGender.evaluate() :: "+ name + " "+gender+ " "+mStatus);
result.set(male +" "+ name);
return result;
}
}
public Text evaluate(Text name) {
System.out.println("UDFGender.evaluate() :: "+ name);
if (name == null) {
return null;
}else {
result.set(male +" "+ name);
return result;
}
}
}
/* NOTE ::
hive> create table thanooj.employees (ID INT, NAME STRING, GENDER STRING, MSTATUS STRING, DOB STRING, SALARY INT, TITLE STRING, DEPT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n';
employees.txt
-------------
110001,sriram,M,married,1989-09-12,30000,tl,d003
10002,seeta,F,married,1989-09-12,30000,tl,d003
10003,lakshman,M,married,1986-08-28,20000,ml,d004
10004,bharatha,M,married,1986-12-01,19000,ml,d004
10005,sethrugna,M,married,1989-09-12,15000,sse,d003
10006,hanuma,M,single,1989-09-12,18000,sse,d003
10006,ahalya,F,single,1989-09-12,18000,sse,d003
hive> LOAD DATA LOCAL INPATH '/home/ubuntu/input/employees.txt' OVERWRITE INTO TABLE THANOOJ.employees;
hive> ADD JAR /home/ubuntu/input/UDFGender.jar;
Added [/home/ubuntu/input/UDFGender.jar] to class path
Added resources: [/home/ubuntu/input/UDFGender.jar]
hive> CREATE TEMPORARY FUNCTION uDFGender AS 'com.mapr.hive.UDFGender';
OK
Time taken: 0.01 seconds
---------------------------------------------------------------------------------------
MISC INFO ::
To use the UDF in Hive, we first need to package the compiled Java class in a JAR file.
You can do this by typing mvn package with the book’s example code. Next, we register
the function in the metastore and give it a name using the CREATE FUNCTION statement:
CREATE FUNCTION strip AS 'com.hadoopbook.hive.Strip'
USING JAR '/path/to/hive-examples.jar';
A UDF must satisfy the following two properties:
A UDF must be a subclass of org.apache.hadoop.hive.ql.exec.UDF.
A UDF must implement at least one evaluate() method.
The evaluate() method is not defined by an interface, since it may take an arbitrary
number of arguments, of arbitrary types, and it may return a value of arbitrary type. Hive
introspects the UDF to find the evaluate() method that matches the Hive function that
was invoked.
When using temporary functions, it may be useful to create a .hiverc file in your home
directory containing the commands to define your UDFs. The file will be automatically
run at the beginning of each Hive session.
*/
/*
OUT PUT ::
hive> select uDFGender(name,gender,mstatus) from thanooj.employees;
OK
UDFGender.evaluate() :: sriram M married
UDFGender.evaluate() :: sriram M married
UDFGender.evaluate() :: seeta F married
UDFGender.evaluate() :: seeta F married
UDFGender.evaluate() :: lakshman M married
UDFGender.evaluate() :: lakshman M married
UDFGender.evaluate() :: bharatha M married
UDFGender.evaluate() :: bharatha M married
UDFGender.evaluate() :: sethrugna M married
UDFGender.evaluate() :: sethrugna M married
UDFGender.evaluate() :: hanuma M single
UDFGender.evaluate() :: hanuma M single
UDFGender.evaluate() :: ahalya F single
UDFGender.evaluate() :: ahalya F single
Mr. sriram
Mrs. seeta
Mr. lakshman
Mr. bharatha
Mr. sethrugna
Mr. hanuma
Miss. ahalya
Time taken: 0.066 seconds, Fetched: 7 row(s)
hive>
*/
@thanoojgithub
Copy link
Author

initial commit

@thanoojgithub
Copy link
Author

added notes

@thanoojgithub
Copy link
Author

add input file and create table and load file - into hive metadataDB

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment