Skip to content

Instantly share code, notes, and snippets.

@barata0
Created June 15, 2016 17:51
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save barata0/b37cddf70dfb801aa5aed3255d4b543c to your computer and use it in GitHub Desktop.
Save barata0/b37cddf70dfb801aa5aed3255d4b543c to your computer and use it in GitHub Desktop.
Elasticsearch reindex groovy script. Read from oldServer/oldIndex/oldType/ and saves it on newServer/newIndex/newType
/*
* Reindex elasticsearch
* Read all documents from oldServer/oldIndex/oldType and put them into newServer/newIndex/newtype
* If the newIndex raises an error script will print and stop
*/
/* Configuration START */
// OLD Server config - READ_ONLY
def oldServer = "http://localhost:9200"
def oldIndex = 'filetracker'
def oldType = 'locatedFile'
// NEW Server config - WRITE
def newServer = "http://localhost:9200"
def newIndex = 'filetracker-2016-06-153'
def newType = 'locatedFile'
// How many documents will be returned on each http request
def maxPerPage = 500
// Scroll timeout. After this interval scroll will be no available anymore
def scrollTimeout = '1m'
// if index script will raise an error when the document already exists
def action = "create" // create or index
@Grapes(
@Grab(group='commons-lang', module='commons-lang', version='2.6')
)
@Grapes(
@Grab(group='org.codehaus.groovy.modules.http-builder', module='http-builder', version='0.7.1')
)
import groovyx.net.http.RESTClient
import static groovyx.net.http.ContentType.*
def source = new RESTClient(oldServer)
def target = new RESTClient(newServer)
def scroll = source.get(path: "/${oldIndex}/${oldType}/_search", query: [scroll: scrollTimeout, size: maxPerPage]).data
def scrollId = scroll._scroll_id
int total = scroll.hits.hits?.size() // read objects total count
while (scrollId) {
def bulkString = "" // bulk API is string with newline delimiters. No json allowed
scroll.hits.hits.each { locatedFile ->
def _id = locatedFile._id
def _source = locatedFile._source
bulkString += '{ "' + action + '": {"_index": "' + newIndex + '", "_type": "' + newType + '", "_id": "' + _id + '"}}\n'
bulkString += "${groovy.json.JsonOutput.toJson(_source)}\n"
}
def resp = target.post(path: '/_bulk', body: bulkString.getBytes(), requestContentType: BINARY, headers: [Accept: 'application/json']).data
if (resp.errors) {
println "Errors found:"
println resp.items
}
scroll = source.get(path: '/_search/scroll', query: [scroll: '1m', scroll_id: scrollId]).data
if (scroll.hits.hits?.size() == 0 || resp.errors) {
println 'no scroll hits or error found. ending...'
scrollId = null
} else {
scrollId = scroll._scroll_id
total += scroll.hits.hits?.size()
println total
}
}
println 'end'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment