Skip to content

Instantly share code, notes, and snippets.

@clowder
Created August 3, 2012 11:08
Show Gist options
  • Save clowder/3246697 to your computer and use it in GitHub Desktop.
Save clowder/3246697 to your computer and use it in GitHub Desktop.
ElasticSearch Reindex
source :rubygems
gem 'rubberband'
gem 'yajl-ruby'
#!/usr/bin/env ruby
#
# Simple reindexing script for ElasticEearch.
#
# Examples
# ./reindex http://localhost:9200/index_one/tweet http://localhost:9200/index_two/tweet
#
require 'rubygems'
require 'bundler'
require 'set'
Bundler.require(:default)
MultiJson.engine = :yajl
class ElasticSearch::Reindex
attr_reader :from, :to, :query_batch_size, :index_batch_size
def initialize(from, to, args={})
@from = from
@to = to
@query_batch_size = args[:query_batch_size] || 10
@index_batch_size = args[:index_batch_size] || 100
end
def perform
results = from.search('*', :scroll => '10h', :size => query_batch_size)
to_index = results.to_a.to_set
from.scroll(results.scroll_id, :scroll => '10h', :size => query_batch_size) do |hits|
to_index = to_index | hits.to_a.to_set
if to_index.count >= index_batch_size
print '.'
bulk_index(to_index)
to_index = Set.new
end
end
end
private
def bulk_index(hits)
to.bulk do |to_index|
hits.each do |hit|
to_index.index(hit._source)
end
print "\bo"
end
print "\bO"
end
end
to_url = ARGV.pop
from_url = ARGV.pop
from = ElasticSearch.new(from_url)
to = ElasticSearch.new(to_url)
reindex = ElasticSearch::Reindex.new(from, to, { :index_batch_size => 2000, :query_batch_size => 1000 })
reindex.perform
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment