Skip to content

Instantly share code, notes, and snippets.

@icamys
Last active September 8, 2021 17:14
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save icamys/4287ae49d20ff2add3db86e2b2053977 to your computer and use it in GitHub Desktop.
Save icamys/4287ae49d20ff2add3db86e2b2053977 to your computer and use it in GitHub Desktop.
Bulk insert data from file to elasticsearch (python)
"""
Usage: python elastic_import_data_bulk.py data.ndjson
"""
import sys
import json
import logging
from elasticsearch import Elasticsearch
from elasticsearch.helpers import streaming_bulk
# =======================
# Configuration start
es_hosts = [
"http://localhost:9200",
]
es_api_user = 'elastic'
es_api_password = 'changeme'
index_name = 'my_index'
chunk_size = 10000
errors_before_interrupt = 5
refresh_index_after_insert = False
max_insert_retries = 3
yield_ok = False # if set to False will skip successful documents in the output
# Configuration end
# =======================
filename = sys.argv[1]
logging.info('Importing data from {}'.format(filename))
es = Elasticsearch(
es_hosts,
http_auth=(es_api_user, es_api_password),
sniff_on_start=True, # sniff before doing anything
sniff_on_connection_fail=True, # refresh nodes after a node fails to respond
sniffer_timeout=60, # and also every 60 seconds
retry_on_timeout=True, # should timeout trigger a retry on different node?
)
def data_generator():
f = open(filename)
for line in f:
yield {**json.loads(line), **{
"_index": index_name,
}}
errors_count = 0
for ok, result in streaming_bulk(es, data_generator(), chunk_size=chunk_size, refresh=refresh_index_after_insert,
max_retries=max_insert_retries, yield_ok=yield_ok):
if ok is not True:
logging.error('Failed to import data')
logging.error(str(result))
errors_count += 1
if errors_count == errors_before_interrupt:
logging.fatal('Too many import errors, exiting with error code')
exit(1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment