Created
January 4, 2020 08:41
-
-
Save aksinghdce/c048a24b7762cebaca0ec6d544772e6d to your computer and use it in GitHub Desktop.
from source elasticsearch to AWS elasticsearch move documents
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from elasticsearch import Elasticsearch | |
from elasticsearch import helpers | |
import json | |
# Define config | |
port = 9200 | |
timeout = 5000 | |
index = "jmeter" | |
doc_type = "type" | |
size = 10000 | |
body = {"query":{ | |
"range":{ | |
"SampleStartTime" : { | |
"gte": "now-5m/m", | |
"lt": "now/m" | |
} | |
} | |
}} | |
# Process hits here | |
i = 0 | |
def process_hits(hits): | |
global i | |
i += 1 | |
print("documents added:",i, ",:", len(hits)) | |
helpers.bulk(es_dest, hits) | |
# Check index exists | |
if not es.indices.exists(index=index): | |
print("Index " + index + " not exists") | |
exit() | |
# for element in body: | |
# if "doc_type" in element: | |
# del element["doc_type"] | |
# Init scroll by search | |
data = es.search( | |
index=index, | |
scroll='2m', | |
size=size, | |
body=body | |
) | |
# Get the scroll ID | |
sid = data['_scroll_id'] | |
scroll_size = len(data['hits']['hits']) | |
while scroll_size > 0: | |
"Scrolling..." | |
# Before scroll, process current batch of hits | |
process_hits(data['hits']['hits']) | |
data = es.scroll(scroll_id=sid, scroll='2m') | |
# Update the scroll ID | |
sid = data['_scroll_id'] | |
# Get the number of results that returned in the last scroll | |
scroll_size = len(data['hits']['hits']) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment