Skip to content

Instantly share code, notes, and snippets.

@jpparis-orange
Last active August 29, 2015 13:56
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jpparis-orange/9319913 to your computer and use it in GitHub Desktop.
Save jpparis-orange/9319913 to your computer and use it in GitHub Desktop.
using hive to copy one es index to another one
#!/bin/bash
# my configuration
# elasticsearch-1.0.0
# elasticsearch-hadoop-yarn.jar from 1.3.0.M2
# hadoop-2.2.0-bin
# hive-0.12.0-bin
ES_CLUSTER="localhost:9200"
# Remove old data
curl -XDELETE "http://${ES_CLUSTER}/hread?pretty"
# Create index with settings
curl -XPOST "http://${ES_CLUSTER}/hread/?pretty" -d '{
"settings" : {
"index" : {
"number_of_shards" : 1,
"number_of_replicas" : 0
}
}
}'
# Define mapping
curl -XPOST "http://${ES_CLUSTER}/hread/doc/_mapping?pretty" -d '
{
"doc" : {
"_id" : { "path" : "my_id" },
"properties" : {
"my_id" : {
"type" : "string"
}
}
}
}'
# Create Documents
curl -XPOST "http://${ES_CLUSTER}/hread/doc/?pretty" -d '
{
"my_id":"doc1"
}'
curl -XPOST "http://${ES_CLUSTER}/hread/doc/?pretty" -d '
{
"my_id":"doc2"
}'
# Wait for ES to be synced (aka refresh indices)
curl -XPOST "http://${ES_CLUSTER}/hread/_refresh?pretty"
# Remove old data
curl -XDELETE "http://${ES_CLUSTER}/hwrite?pretty"
# Create index with settings
curl -XPOST "http://${ES_CLUSTER}/hwrite/?pretty" -d '{
"settings" : {
"index" : {
"number_of_shards" : 1,
"number_of_replicas" : 0
}
}
}'
# Define mapping
curl -XPOST "http://${ES_CLUSTER}/hwrite/doc/_mapping?pretty" -d '
{
"doc" : {
"_id" : { "path" : "my_id" },
"properties" : {
"my_id" : {
"type" : "string",
"index" : "not_analyzed"
}
}
}
}'
exit
######################################
# HIVE COMMANDS
######################################
DROP TABLE es_read;
CREATE EXTERNAL TABLE es_read (my_id STRING)
STORED BY 'org.elasticsearch.hadoop.hive.EsStorageHandler'
TBLPROPERTIES('es.resource' = 'hread/doc', 'es.query' = '');
DROP TABLE es_write;
CREATE EXTERNAL TABLE es_write (my_id STRING)
STORED BY 'org.elasticsearch.hadoop.hive.EsStorageHandler'
TBLPROPERTIES('es.resource' = 'hwrite/doc', 'es.mapping.id' = 'my_id');
INSERT OVERWRITE TABLE es_write SELECT * FROM es_read;
# Following commands do the expected job after 68cd50e
DROP TABLE es_read;
CREATE EXTERNAL TABLE es_read (my_id STRING)
STORED BY 'org.elasticsearch.hadoop.hive.EsStorageHandler'
TBLPROPERTIES('es.resource.read' = 'hread/doc', 'es.query' = '');
DROP TABLE es_write;
CREATE EXTERNAL TABLE es_write (my_id STRING)
STORED BY 'org.elasticsearch.hadoop.hive.EsStorageHandler'
TBLPROPERTIES('es.resource.write' = 'hwrite/doc', 'es.mapping.id' = 'my_id');
INSERT OVERWRITE TABLE es_write SELECT * FROM es_read;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment