Skip to content

Instantly share code, notes, and snippets.

@michaelkeevildown
Created July 1, 2016 14:39
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save michaelkeevildown/a6af16fa4614f0dc9995d210cb226eff to your computer and use it in GitHub Desktop.
Save michaelkeevildown/a6af16fa4614f0dc9995d210cb226eff to your computer and use it in GitHub Desktop.
Elasticsearch Python Scroll Script
########################################
# Tested on Elasticsearch Version: 2.x #
########################################
from elasticsearch import Elasticsearch
import certifi
index_name = 'index name here'
index_type = 'index type here'
elasticsearch_hostname = 'xyz.found.io
elasticsearch_username = 'username'
elasticsearch_password = 'password'
elasticsearch_port = 1234
es = Elasticsearch(
[elasticsearch_hostname],
http_auth = (elasticsearch_username, elasticsearch_password),
port = elasticsearch_port,
use_ssl = True,
verify_certs = True,
ca_certs=certifi.where(),
)
# Initialize the scroll
page = es.search(
index = index_name,
doc_type = index_type,
scroll = '2m',
search_type = 'scan',
size = 1000,
body = {"query": {"match_all": {}}})
sid = page['_scroll_id']
scroll_size = page['hits']['total']
# Start scrolling
while (scroll_size > 0):
print "Scrolling..."
page = es.scroll(scroll_id = sid, scroll = '2m')
# Update the scroll ID
sid = page['_scroll_id']
# Get the number of results that we returned in the last scroll
scroll_size = len(page['hits']['hits'])
#
records = page['hits']['hits']
print "scroll size: " + str(scroll_size)
# Do something with the obtained page
for record in records:
print record['_source']
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment