Skip to content

Instantly share code, notes, and snippets.

@ns-mkusper
Created May 23, 2019 15:46
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ns-mkusper/cd315c122faeecb4c95629e0152a0fd2 to your computer and use it in GitHub Desktop.
Save ns-mkusper/cd315c122faeecb4c95629e0152a0fd2 to your computer and use it in GitHub Desktop.
Minimal working example of large ElasticSearch query result grabbing in python
from elasticsearch import Elasticsearch, RequestsHttpConnection
from requests_aws4auth import AWS4Auth
import elasticsearch.helpers
import boto3
import datetime
import json
host = ''
region = ''
service = 'es'
query = ''
domain = ''
index = ''
credentials = boto3.Session().get_credentials()
awsauth = AWS4Auth(
credentials.access_key,
credentials.secret_key,
region,
service,
session_token=credentials.token)
es = Elasticsearch(
hosts=[{
'host': host,
'port': 443
}],
http_auth=awsauth,
use_ssl=True,
verify_certs=True,
connection_class=RequestsHttpConnection)
def run_scan(scan):
"""Return a list of items in ES scan generator object.
"""
# skip first scan object to get to the results
next(scan)
def scan_runner():
index = 1
for item in scan:
index += 1
if (index % 1000) == 0:
print("Items read: " + index)
yield json.loads(json.dumps(item['_source']))
return (list(scan_runner()))
scan_args = {'client': es, 'index': index, 'preserve_order': True}
if query:
scan_args['query'] = query
scanner = elasticsearch.helpers.scan(**scan_args)
scan_results = run_scan(scanner)
print(scan_results)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment