Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Reading double from es with hive
#!/bin/bash
# my configuration
# elasticsearch-1.0.0
# elasticsearch-hadoop-yarn.jar from 1.3.0.M2
# hadoop-2.2.0-bin
# hive-0.12.0-bin
# pig-0.12.0/ with recompiled pig-0.12.0-withouthadoop.jar for yarn
ES_CLUSTER="localhost:9200"
# Remove old data
curl -XDELETE "http://${ES_CLUSTER}/hread?pretty"
# Create index with settings
curl -XPOST "http://${ES_CLUSTER}/hread/?pretty" -d '{
"settings" : {
"index" : {
"number_of_shards" : 1,
"number_of_replicas" : 0
}
}
}'
# Define mapping
curl -XPOST "http://${ES_CLUSTER}/hread/doc/_mapping?pretty" -d '
{
"doc" : {
"properties" : {
"my_id" : {
"type" : "string"
},
"my_double" : {
"type" : "double"
},
"my_float" : {
"type" : "float"
}
}
}
}'
# Create Documents
curl -XPOST "http://${ES_CLUSTER}/hread/doc/?pretty" -d '
{
"my_id":"doc1",
"my_double": 1.1,
"my_float": 1.12
}'
curl -XPOST "http://${ES_CLUSTER}/hread/doc/?pretty" -d '
{
"my_id":"doc2",
"my_double": 2.2,
"my_float": 2.22
}'
# Wait for ES to be synced (aka refresh indices)
curl -XPOST "http://${ES_CLUSTER}/hread/_refresh?pretty"
exit
######################################
# HIVE COMMANDS
######################################
DROP TABLE es_read;
CREATE EXTERNAL TABLE es_read (my_id STRING, my_double DOUBLE, my_float FLOAT)
STORED BY 'org.elasticsearch.hadoop.hive.EsStorageHandler'
TBLPROPERTIES('es.resource' = 'hread/doc', 'es.query' = '');
SELECT my_id, my_float FROM es_read;
# OK
# doc1 1.12
# doc2 2.22
SELECT my_id, my_double, my_float FROM es_read;
# Caused by: java.lang.ClassCastException: org.apache.hadoop.io.DoubleWritable cannot be cast to org.apache.hadoop.hive.serde2.io.DoubleWritable
# at org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDoubleObjectInspector.get(WritableDoubleObjectInspector.java:35)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.