Skip to content

Instantly share code, notes, and snippets.

@dlemphers
Created August 12, 2012 17:29
Show Gist options
  • Save dlemphers/3333194 to your computer and use it in GitHub Desktop.
Save dlemphers/3333194 to your computer and use it in GitHub Desktop.
Simple ES export script
#!/usr/bin/python
import requests
import simplejson as json
import logging
import argparse
def make_request(url, payload):
return requests.get(url, data=payload).json
if __name__=="__main__":
logging.basicConfig(level='DEBUG')
parser = argparse.ArgumentParser()
parser.add_argument('--server', type=str, help='Enter the server URL', required=True)
parser.add_argument('--index', type=str, help='Enter the index', required=True)
parser.add_argument('--type', type=str, help='Enter the type', required=True)
parser.add_argument('--size', type=str, help='Enter the scroll size', required=True)
parser.add_argument('--output', type=str, help='Enter the filename to output to', required=True)
args = parser.parse_args()
response = make_request(
'http://{0}/{1}/{2}/_search?search_type=scan&scroll=10m&size={3}'.format(args.server, args.index, args.type, args.size),
json.dumps({
"query" : {
"match_all" : {}
}
})
)
scroll_id = response['_scroll_id']
hits = response['hits']['total']
logging.info('Scroll id is {0}'.format(scroll_id))
logging.info('Hits = {0}'.format(hits))
records_written = 0
with open(args.output, 'w') as output:
response = make_request(
'http://{0}/_search/scroll?scroll=10m'.format(args.server),
scroll_id
)
while response['hits']['hits']:
logging.info('Received {0}'.format(len(response['hits']['hits'])))
for record in response['hits']['hits']:
try:
record = record['_source']
record['_id'] = record['id']
output.write('{0}\n'.format(json.dumps(record)))
records_written += 1
except:
logging.exception('Unable to write record')
logging.info('{0} of {1} written'.format(records_written, hits))
response = make_request(
'http://{0}/_search/scroll?scroll=10m'.format(args.server),
response['_scroll_id']
)
@dlemphers
Copy link
Author

This was designed for exporting from ES and then importing into mongo, hence the addition of the _id back into the record.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment