Skip to content

Instantly share code, notes, and snippets.

@nviennot
Created October 17, 2013 20:52
Show Gist options
  • Save nviennot/7032026 to your computer and use it in GitHub Desktop.
Save nviennot/7032026 to your computer and use it in GitHub Desktop.
#!/usr/bin/env ruby
# Gemfile
# source 'https://rubygems.org/'
# gem 'stretcher', git: 'git://github.com/PoseBiz/stretcher.git'
# gem "ruby-progressbar", :require => false
# gem 'multi_json'
# gem 'oj'
# gem 'pry'
ENV['ELASTICSEARCH_URL'] = 'http://logstash.ec2.crowdtap.com:9200'
require 'bundler'
Bundler.require
class ES
def self.server
Thread.current[:es_connection] ||= Stretcher::Server.new(ENV['ELASTICSEARCH_URL'])
end
def self.index(index_name)
server.index(index_name)
end
def self.scan_search(index_name, query, &block)
require 'ruby-progressbar'
# XXX Size is per shard, not total
result = self.index(index_name).search({:search_type => :scan, :scroll => '5m', :size => 1000}, query)
return if result.total.zero?
scroll_id = result.raw[:_scroll_id]
bar = ProgressBar.create(:format => '%t |%b>%i| %c/%C %e', :title => "Scan", :total => result.total)
loop do
result = ES.server.request(:get, '_search/scroll', :scroll => '5m', :scroll_id => scroll_id)
data = result.hits.hits
break if data.empty?
scroll_id = result[:_scroll_id]
bar.progress += data.size
block.call(data)
end
bar.finish
end
end
File.open("output.json", 'w') do |f|
need_comma = false
f.puts "["
result = ES.scan_search('logstash-2013.10.08',
:query => {:match_all => {}},
:fields => [:@timestamp, :@message, :@source_host]
) do |data|
data.each do |r|
f.puts "," if need_comma
f.print MultiJson.dump(r['fields'], :pretty => true)
need_comma = true
end
end
f.puts "\n]"
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment