Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
ES bulk importer script
from pyelasticsearch import ElasticSearch
import json
import codecs
import glob
import os
# ElasticSearch settings
ES_CLUSTER = 'http://localhost:9200/'
ES_INDEX = 'kb'
ES_TYPE = 'doc'
es = ElasticSearch(ES_CLUSTER)
if __name__ == "__main__":
import argparse
oArgParser = argparse.ArgumentParser()
oArgParser.add_argument("INPUT_DIR", metavar="DIR",
help="Directory with JSON files")
oArgs = oArgParser.parse_args()
for sJsonFile in glob.glob(os.path.join(oArgs.INPUT_DIR, "*.json")):
fhFile = codecs.open(sJsonFile, mode='r', encoding='utf8')
sJSON = fhFile.read()
aArticles = json.loads(sJSON)
fhFile.close()
# This is a check to see whether it works
#for dArticle in aArticles:
# print "Title: %s" % dArticle['article_dc_title'].encode("utf8")
# print "Text: %s" % dArticle['text_content'][0:120].encode("utf8")
# print
# the bulk index command
es.bulk_index(ES_INDEX, ES_TYPE, aArticles, id_field='_id')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.