ns-mkusper/es_result_scanning.py

## es_result_scanning.py
from elasticsearch import Elasticsearch, RequestsHttpConnection
from requests_aws4auth import AWS4Auth
import elasticsearch.helpers
import boto3
import datetime
import json

host = ''
region = ''
service = 'es'
query = ''
domain = ''
index = ''
credentials = boto3.Session().get_credentials()
awsauth = AWS4Auth(
    credentials.access_key,
    credentials.secret_key,
    region,
    service,
    session_token=credentials.token)

es = Elasticsearch(
    hosts=[{
        'host': host,
        'port': 443
    }],
    http_auth=awsauth,
    use_ssl=True,
    verify_certs=True,
    connection_class=RequestsHttpConnection)


def run_scan(scan):
    """Return a list of items in ES scan generator object.
    """
    # skip first scan object to get to the results
    next(scan)

    def scan_runner():
        index = 1
        for item in scan:
            index += 1
            if (index % 1000) == 0:
                print("Items read: " + index)
            yield json.loads(json.dumps(item['_source']))

    return (list(scan_runner()))


scan_args = {'client': es, 'index': index, 'preserve_order': True}
if query:
    scan_args['query'] = query
scanner = elasticsearch.helpers.scan(**scan_args)

scan_results = run_scan(scanner)
print(scan_results)
	from elasticsearch import Elasticsearch, RequestsHttpConnection
	from requests_aws4auth import AWS4Auth
	import elasticsearch.helpers
	import boto3
	import datetime
	import json

	host = ''
	region = ''
	service = 'es'
	query = ''
	domain = ''
	index = ''
	credentials = boto3.Session().get_credentials()
	awsauth = AWS4Auth(
	credentials.access_key,
	credentials.secret_key,
	region,
	service,
	session_token=credentials.token)

	es = Elasticsearch(
	hosts=[{
	'host': host,
	'port': 443
	}],
	http_auth=awsauth,
	use_ssl=True,
	verify_certs=True,
	connection_class=RequestsHttpConnection)


	def run_scan(scan):
	"""Return a list of items in ES scan generator object.
	"""
	# skip first scan object to get to the results
	next(scan)

	def scan_runner():
	index = 1
	for item in scan:
	index += 1
	if (index % 1000) == 0:
	print("Items read: " + index)
	yield json.loads(json.dumps(item['_source']))

	return (list(scan_runner()))


	scan_args = {'client': es, 'index': index, 'preserve_order': True}
	if query:
	scan_args['query'] = query
	scanner = elasticsearch.helpers.scan(**scan_args)

	scan_results = run_scan(scanner)
	print(scan_results)