Skip to content

Instantly share code, notes, and snippets.

@muhammadddev
Created August 18, 2021 16:48
Show Gist options
  • Save muhammadddev/2901d95af03977968de7731c41678365 to your computer and use it in GitHub Desktop.
Save muhammadddev/2901d95af03977968de7731c41678365 to your computer and use it in GitHub Desktop.
Retrieve all the documents from an Elasticsearch index
import json
import time
import pandas as pd
from elasticsearch import Elasticsearch
from elasticsearch.helpers import scan
start_time = time.time()
ELS_CONNECTION_STR = "http://elastic:changeme@localhost:9200/" # YOUR CONNECTION STRING
QUERY = {"query": {"match_all": {}}}
INDEX_NAME = "YOUR_INDEX_NAME"
print("Creating client instance of Elasticsearch")
elastic_client = Elasticsearch(
ELS_CONNECTION_STR,
timeout=30,
max_retries=10,
retry_on_timeout=True,
)
print("Making API call to Elasticsearch.")
# for more options and configuration see: https://elasticsearch-py.readthedocs.io/en/master/helpers.html#scan
response = scan(elastic_client, query=QUERY, index=INDEX_NAME)
doc_data = []
for i, result in enumerate(response):
print(f"{i}")
doc_data.append(result["_source"])
docs = pd.DataFrame(doc_data)
print("Exporting Pandas objects to different file types.")
docs.to_json("bakcup.json")
docs.to_csv("bakcup.csv")
print("Time elapsed:", time.time() - start_time)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment