Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save aksinghdce/c048a24b7762cebaca0ec6d544772e6d to your computer and use it in GitHub Desktop.
Save aksinghdce/c048a24b7762cebaca0ec6d544772e6d to your computer and use it in GitHub Desktop.
from source elasticsearch to AWS elasticsearch move documents
from elasticsearch import Elasticsearch
from elasticsearch import helpers
import json
# Define config
port = 9200
timeout = 5000
index = "jmeter"
doc_type = "type"
size = 10000
body = {"query":{
"range":{
"SampleStartTime" : {
"gte": "now-5m/m",
"lt": "now/m"
}
}
}}
# Process hits here
i = 0
def process_hits(hits):
global i
i += 1
print("documents added:",i, ",:", len(hits))
helpers.bulk(es_dest, hits)
# Check index exists
if not es.indices.exists(index=index):
print("Index " + index + " not exists")
exit()
# for element in body:
# if "doc_type" in element:
# del element["doc_type"]
# Init scroll by search
data = es.search(
index=index,
scroll='2m',
size=size,
body=body
)
# Get the scroll ID
sid = data['_scroll_id']
scroll_size = len(data['hits']['hits'])
while scroll_size > 0:
"Scrolling..."
# Before scroll, process current batch of hits
process_hits(data['hits']['hits'])
data = es.scroll(scroll_id=sid, scroll='2m')
# Update the scroll ID
sid = data['_scroll_id']
# Get the number of results that returned in the last scroll
scroll_size = len(data['hits']['hits'])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment