Created
July 1, 2016 14:39
-
-
Save michaelkeevildown/a6af16fa4614f0dc9995d210cb226eff to your computer and use it in GitHub Desktop.
Elasticsearch Python Scroll Script
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
######################################## | |
# Tested on Elasticsearch Version: 2.x # | |
######################################## | |
from elasticsearch import Elasticsearch | |
import certifi | |
index_name = 'index name here' | |
index_type = 'index type here' | |
elasticsearch_hostname = 'xyz.found.io | |
elasticsearch_username = 'username' | |
elasticsearch_password = 'password' | |
elasticsearch_port = 1234 | |
es = Elasticsearch( | |
[elasticsearch_hostname], | |
http_auth = (elasticsearch_username, elasticsearch_password), | |
port = elasticsearch_port, | |
use_ssl = True, | |
verify_certs = True, | |
ca_certs=certifi.where(), | |
) | |
# Initialize the scroll | |
page = es.search( | |
index = index_name, | |
doc_type = index_type, | |
scroll = '2m', | |
search_type = 'scan', | |
size = 1000, | |
body = {"query": {"match_all": {}}}) | |
sid = page['_scroll_id'] | |
scroll_size = page['hits']['total'] | |
# Start scrolling | |
while (scroll_size > 0): | |
print "Scrolling..." | |
page = es.scroll(scroll_id = sid, scroll = '2m') | |
# Update the scroll ID | |
sid = page['_scroll_id'] | |
# Get the number of results that we returned in the last scroll | |
scroll_size = len(page['hits']['hits']) | |
# | |
records = page['hits']['hits'] | |
print "scroll size: " + str(scroll_size) | |
# Do something with the obtained page | |
for record in records: | |
print record['_source'] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment