Skip to content

Instantly share code, notes, and snippets.

@melvinwevers
Created March 16, 2015 11:29
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save melvinwevers/39e942fe1b925b067b85 to your computer and use it in GitHub Desktop.
Save melvinwevers/39e942fe1b925b067b85 to your computer and use it in GitHub Desktop.
ES bulk importer script
from pyelasticsearch import ElasticSearch
import json
import codecs
import glob
import os
# ElasticSearch settings
ES_CLUSTER = 'http://localhost:9200/'
ES_INDEX = 'kb'
ES_TYPE = 'doc'
es = ElasticSearch(ES_CLUSTER)
if __name__ == "__main__":
import argparse
oArgParser = argparse.ArgumentParser()
oArgParser.add_argument("INPUT_DIR", metavar="DIR",
help="Directory with JSON files")
oArgs = oArgParser.parse_args()
for sJsonFile in glob.glob(os.path.join(oArgs.INPUT_DIR, "*.json")):
fhFile = codecs.open(sJsonFile, mode='r', encoding='utf8')
sJSON = fhFile.read()
aArticles = json.loads(sJSON)
fhFile.close()
# This is a check to see whether it works
#for dArticle in aArticles:
# print "Title: %s" % dArticle['article_dc_title'].encode("utf8")
# print "Text: %s" % dArticle['text_content'][0:120].encode("utf8")
# print
# the bulk index command
es.bulk_index(ES_INDEX, ES_TYPE, aArticles, id_field='_id')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment