barata0/elasticsearch-reindex.groovy

## elasticsearch-reindex.groovy
/*
* Reindex elasticsearch
* Read all documents from oldServer/oldIndex/oldType and put them into newServer/newIndex/newtype
* If the newIndex raises an error script will print and stop
*/

/* Configuration START */

// OLD Server config - READ_ONLY
def oldServer = "http://localhost:9200"
def oldIndex = 'filetracker'
def oldType = 'locatedFile'

// NEW Server config - WRITE
def newServer = "http://localhost:9200"
def newIndex = 'filetracker-2016-06-153'
def newType = 'locatedFile'

// How many documents will be returned on each http request
def maxPerPage = 500

// Scroll timeout. After this interval scroll will be no available anymore
def scrollTimeout  = '1m'

// if index script will raise an error when the document already exists
def action = "create" // create or index


@Grapes(
    @Grab(group='commons-lang', module='commons-lang', version='2.6')
)
@Grapes(
    @Grab(group='org.codehaus.groovy.modules.http-builder', module='http-builder', version='0.7.1')
)


import groovyx.net.http.RESTClient
import static groovyx.net.http.ContentType.*


def source = new RESTClient(oldServer)
def target = new RESTClient(newServer)

def scroll = source.get(path: "/${oldIndex}/${oldType}/_search", query: [scroll: scrollTimeout, size: maxPerPage]).data

def scrollId = scroll._scroll_id


int total = scroll.hits.hits?.size() // read objects total count

while (scrollId) {

    def bulkString = "" // bulk API is string with newline delimiters. No json allowed
    scroll.hits.hits.each { locatedFile ->
        def _id = locatedFile._id
        def _source = locatedFile._source
        bulkString += '{ "' + action + '": {"_index": "' + newIndex + '", "_type": "' + newType + '", "_id": "' + _id + '"}}\n'
        bulkString += "${groovy.json.JsonOutput.toJson(_source)}\n"
    }

    def resp = target.post(path: '/_bulk', body: bulkString.getBytes(), requestContentType: BINARY, headers: [Accept: 'application/json']).data

    if (resp.errors) {
        println "Errors found:"
        println resp.items
    }

    scroll = source.get(path: '/_search/scroll', query: [scroll: '1m', scroll_id: scrollId]).data

    if (scroll.hits.hits?.size() == 0 || resp.errors) {
        println 'no scroll hits or error found. ending...'
        scrollId = null
    } else {
        scrollId = scroll._scroll_id
        total += scroll.hits.hits?.size()
        println total
    }
}

println 'end'
	/*
	* Reindex elasticsearch
	* Read all documents from oldServer/oldIndex/oldType and put them into newServer/newIndex/newtype
	* If the newIndex raises an error script will print and stop
	*/

	/* Configuration START */

	// OLD Server config - READ_ONLY
	def oldServer = "http://localhost:9200"
	def oldIndex = 'filetracker'
	def oldType = 'locatedFile'

	// NEW Server config - WRITE
	def newServer = "http://localhost:9200"
	def newIndex = 'filetracker-2016-06-153'
	def newType = 'locatedFile'

	// How many documents will be returned on each http request
	def maxPerPage = 500

	// Scroll timeout. After this interval scroll will be no available anymore
	def scrollTimeout = '1m'

	// if index script will raise an error when the document already exists
	def action = "create" // create or index


	@Grapes(
	@Grab(group='commons-lang', module='commons-lang', version='2.6')
	)
	@Grapes(
	@Grab(group='org.codehaus.groovy.modules.http-builder', module='http-builder', version='0.7.1')
	)


	import groovyx.net.http.RESTClient
	import static groovyx.net.http.ContentType.*


	def source = new RESTClient(oldServer)
	def target = new RESTClient(newServer)

	def scroll = source.get(path: "/${oldIndex}/${oldType}/_search", query: [scroll: scrollTimeout, size: maxPerPage]).data

	def scrollId = scroll._scroll_id


	int total = scroll.hits.hits?.size() // read objects total count

	while (scrollId) {

	def bulkString = "" // bulk API is string with newline delimiters. No json allowed
	scroll.hits.hits.each { locatedFile ->
	def _id = locatedFile._id
	def _source = locatedFile._source
	bulkString += '{ "' + action + '": {"_index": "' + newIndex + '", "_type": "' + newType + '", "_id": "' + _id + '"}}\n'
	bulkString += "${groovy.json.JsonOutput.toJson(_source)}\n"
	}

	def resp = target.post(path: '/_bulk', body: bulkString.getBytes(), requestContentType: BINARY, headers: [Accept: 'application/json']).data

	if (resp.errors) {
	println "Errors found:"
	println resp.items
	}

	scroll = source.get(path: '/_search/scroll', query: [scroll: '1m', scroll_id: scrollId]).data

	if (scroll.hits.hits?.size() == 0 \|\| resp.errors) {
	println 'no scroll hits or error found. ending...'
	scrollId = null
	} else {
	scrollId = scroll._scroll_id
	total += scroll.hits.hits?.size()
	println total
	}
	}

	println 'end'