michaelkeevildown/scroll.py

## scroll.py
########################################
# Tested on Elasticsearch Version: 2.x #
########################################

from elasticsearch import Elasticsearch
import certifi

index_name = 'index name here'
index_type = 'index type here'

elasticsearch_hostname = 'xyz.found.io
elasticsearch_username = 'username'
elasticsearch_password = 'password'
elasticsearch_port = 1234

es = Elasticsearch(
    [elasticsearch_hostname],
    http_auth = (elasticsearch_username, elasticsearch_password),
    port = elasticsearch_port,
    use_ssl = True,
    verify_certs = True,
    ca_certs=certifi.where(),
)

# Initialize the scroll
page = es.search(
  index = index_name,
  doc_type = index_type,
  scroll = '2m',
  search_type = 'scan',
  size = 1000,
  body = {"query": {"match_all": {}}})
sid = page['_scroll_id']
scroll_size = page['hits']['total']

# Start scrolling
while (scroll_size > 0):
    print "Scrolling..."
    page = es.scroll(scroll_id = sid, scroll = '2m')
    # Update the scroll ID
    sid = page['_scroll_id']
    # Get the number of results that we returned in the last scroll
    scroll_size = len(page['hits']['hits'])
    #
    records = page['hits']['hits']
    print "scroll size: " + str(scroll_size)
    # Do something with the obtained page
    for record in records:
        print record['_source']
	########################################
	# Tested on Elasticsearch Version: 2.x #
	########################################

	from elasticsearch import Elasticsearch
	import certifi

	index_name = 'index name here'
	index_type = 'index type here'

	elasticsearch_hostname = 'xyz.found.io
	elasticsearch_username = 'username'
	elasticsearch_password = 'password'
	elasticsearch_port = 1234

	es = Elasticsearch(
	[elasticsearch_hostname],
	http_auth = (elasticsearch_username, elasticsearch_password),
	port = elasticsearch_port,
	use_ssl = True,
	verify_certs = True,
	ca_certs=certifi.where(),
	)

	# Initialize the scroll
	page = es.search(
	index = index_name,
	doc_type = index_type,
	scroll = '2m',
	search_type = 'scan',
	size = 1000,
	body = {"query": {"match_all": {}}})
	sid = page['_scroll_id']
	scroll_size = page['hits']['total']

	# Start scrolling
	while (scroll_size > 0):
	print "Scrolling..."
	page = es.scroll(scroll_id = sid, scroll = '2m')
	# Update the scroll ID
	sid = page['_scroll_id']
	# Get the number of results that we returned in the last scroll
	scroll_size = len(page['hits']['hits'])
	#
	records = page['hits']['hits']
	print "scroll size: " + str(scroll_size)
	# Do something with the obtained page
	for record in records:
	print record['_source']