muhammadddev/export_elasticsearch_data.py

## export_elasticsearch_data.py
import json
import time

import pandas as pd
from elasticsearch import Elasticsearch
from elasticsearch.helpers import scan

start_time = time.time()

ELS_CONNECTION_STR = "http://elastic:changeme@localhost:9200/" # YOUR CONNECTION STRING
QUERY = {"query": {"match_all": {}}}
INDEX_NAME = "YOUR_INDEX_NAME"


print("Creating client instance of Elasticsearch")
elastic_client = Elasticsearch(
    ELS_CONNECTION_STR,
    timeout=30,
    max_retries=10,
    retry_on_timeout=True,
)


print("Making API call to Elasticsearch.")
# for more options and configuration see: https://elasticsearch-py.readthedocs.io/en/master/helpers.html#scan
response = scan(elastic_client, query=QUERY, index=INDEX_NAME)

doc_data = []
for i, result in enumerate(response):

    print(f"{i}")

    doc_data.append(result["_source"])

docs = pd.DataFrame(doc_data)

print("Exporting Pandas objects to different file types.")

docs.to_json("bakcup.json")
docs.to_csv("bakcup.csv")

print("Time elapsed:", time.time() - start_time)
	import json
	import time

	import pandas as pd
	from elasticsearch import Elasticsearch
	from elasticsearch.helpers import scan

	start_time = time.time()

	ELS_CONNECTION_STR = "http://elastic:changeme@localhost:9200/" # YOUR CONNECTION STRING
	QUERY = {"query": {"match_all": {}}}
	INDEX_NAME = "YOUR_INDEX_NAME"


	print("Creating client instance of Elasticsearch")
	elastic_client = Elasticsearch(
	ELS_CONNECTION_STR,
	timeout=30,
	max_retries=10,
	retry_on_timeout=True,
	)


	print("Making API call to Elasticsearch.")
	# for more options and configuration see: https://elasticsearch-py.readthedocs.io/en/master/helpers.html#scan
	response = scan(elastic_client, query=QUERY, index=INDEX_NAME)

	doc_data = []
	for i, result in enumerate(response):

	print(f"{i}")

	doc_data.append(result["_source"])

	docs = pd.DataFrame(doc_data)

	print("Exporting Pandas objects to different file types.")

	docs.to_json("bakcup.json")
	docs.to_csv("bakcup.csv")

	print("Time elapsed:", time.time() - start_time)