Skip to content

Instantly share code, notes, and snippets.

@jasonthomas
Created November 8, 2012 01:02
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jasonthomas/4035724 to your computer and use it in GitHub Desktop.
Save jasonthomas/4035724 to your computer and use it in GitHub Desktop.
elasticsearch backup script
#!/bin/env python
import tarfile
import urllib2
import json
import os
import sys
import datetime
from optparse import OptionParser
ES_HTTP = 'http://localhost:9200'
ES_DIR = '/var/lib/elasticsearch'
BACKUP_DIR = '/var/lib/elasticsearch-backup'
# return clustername
def get_clustername():
es_status = "%s/_cluster/health" % ES_HTTP
response = urllib2.urlopen(es_status)
data = json.loads(response.read())
return data['cluster_name']
# return index mapping
def get_mapping(index, display=False):
es_mapping = "%s/%s/_mapping" % (ES_HTTP, index)
response = urllib2.urlopen(es_mapping)
data = response.read()
if display:
print data
else:
return data
# return index if alias
def get_index(alias):
es_alias = "%s/%s/_aliases" % (ES_HTTP, alias)
response = urllib2.urlopen(es_alias)
data = json.loads(response.read())
if len(data.keys()) == 1:
for key in data.keys():
return key
elif len(data.keys()) == 0:
print 'Index not found'
sys.exit(1)
# disable/enable translog flush for backups
def disable_translog_flush(index, state='false'):
data = {'index': {'translog': {'disable_flush': state}}}
es_settings = "%s/%s/_settings" % (ES_HTTP, index)
opener = urllib2.build_opener(urllib2.HTTPHandler)
request = urllib2.Request(es_settings, data=json.dumps(data))
request.add_header('Content-Type', 'application/json; charset=UTF-8')
request.get_method = lambda: 'PUT'
response = opener.open(request)
status = json.loads(response.read())
if not status['ok']:
print 'Could not verify translog return value'
sys.exit(1)
def cleanup(path):
if os.path.exists(path):
os.remove(path)
def backup(index):
clustername = get_clustername()
rindex = get_index(index)
date = datetime.datetime.now().strftime("%Y%m%d%H%M")
es_index_dir = "%s/%s/nodes/0/indices/%s" % (ES_DIR, clustername, rindex)
backup_file = "%s/%s_%s.tar.gz" % (BACKUP_DIR, rindex, date)
backup_json_file = "%s/%s_%s.json" % (BACKUP_DIR, rindex, date)
# create backup dir if it doesn't exist
if not os.path.exists(BACKUP_DIR):
os.mkdir(BACKUP_DIR, 0755)
# let people know that it's going down
print 'Creating backup for index %s' % rindex
try:
# before we start disable translog
disable_translog_flush(rindex, 'true')
# get mapping and write to file
with open(backup_json_file, 'w') as fp:
mapping = get_mapping(rindex)
json.dump(mapping, fp)
# create tar of index dir
tar = tarfile.open(backup_file, "w:gz")
tar.add(es_index_dir)
tar.close()
# re enable translog flush
disable_translog_flush(rindex)
except:
# cleanup tasks
disable_translog_flush(rindex)
cleanup(backup_json_file)
cleanup(backup_file)
print "something went wrong:", sys.exc_info()
sys.exit(1)
def main():
parser = OptionParser(usage="usage: %prog [options] index")
parser.add_option("-b", "--backup",
action="store_true",
help="Backup specified index")
parser.add_option("-m", "--mapping",
action="store_true",
help="Print mapping of specified index")
(options, args) = parser.parse_args()
if len(args) != 1:
parser.error("wrong number of arguments")
if options.backup:
backup(args[0])
elif options.mapping:
get_mapping(get_index(args[0]), True)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment