Skip to content

Instantly share code, notes, and snippets.

@ruthgrace
Last active November 7, 2021 17:22
Show Gist options
  • Save ruthgrace/7a7689caeb8a8054ad21faa552fe1bf0 to your computer and use it in GitHub Desktop.
Save ruthgrace/7a7689caeb8a8054ad21faa552fe1bf0 to your computer and use it in GitHub Desktop.
# To run this script make a directory called elasticsearch for your data.
# Note that this script won't work if your data is too big for local disk.
import pickle
from elasticsearch import Elasticsearch
### CHANGE THESE CONSTANTS TO MATCH THE ELASTICSEARCH CLUSTER YOU'RE MIGRATING FROM ###
ES_HOST = "example.elasticsearch.url.com"
ES_PORT = 9200
ES_INDEX = "my_index_name"
ES_DOC_TYPE = "feed"
# Make the page size smaller if your documents are large.
PAGE_SIZE = 1000
# Fill out the query below for the data you want to migrate.
query = {
"query": {
"range": {
"event_creation_time": {
"gte": some_start_time,
"lte": some_end_time,
}
}
}
}
###
es = Elasticsearch([{'host': ES_HOST, 'port': ES_PORT}])
all_search_hits = []
page = es.search(
index=ES_INDEX,
doc_type=ES_DOC_TYPE,
scroll='2m',
sort='_doc',
size=PAGE_SIZE,
body=query)
sid = page['_scroll_id']
scroll_size = page['hits']['total']
hits = page["hits"]["hits"]
counter = 0
while scroll_size > 0:
output = open("elasticsearch/elasticsearch_" + str(counter) + ".pickle", "w")
pickle.dump(hits,output)
output.close()
page = es.scroll(scroll_id=sid, scroll='2m')
sid = page['_scroll_id']
scroll_size = len(page['hits']['hits'])
hits = page["hits"]["hits"]
counter += 1
output=open("elasticsearch/elasticsearch_" + str(counter) + ".pickle", "w")
pickle.dump(hits,output)
output.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment