Skip to content

Instantly share code, notes, and snippets.

@arioch
Created August 5, 2013 15:09
Show Gist options
  • Save arioch/6156667 to your computer and use it in GitHub Desktop.
Save arioch/6156667 to your computer and use it in GitHub Desktop.
#!/bin/bash
#
# elasticsearch-backup-index.sh
#
# Push logstash index from yesterday to s3 with an accompanying restore script.
# http://logstash.net
# http://www.elasticsearch.org
# https://github.com/s3tools/s3cmd | http://s3tools.org/s3cmd
#
# Inspiration:
# http://tech.superhappykittymeow.com/?p=296
#
# Must run on an elasticsearch node, and expects to find the index on this node.
usage()
{
cat << EOF
elasticsearch-backup-index.sh
Create a restorable backup of an elasticsearch index (assumes Logstash format
indexes), and upload it to an existing S3 bucket. The default backs up an
index from yesterday. Note that this script itself does not restart
elasticsearch - the restore script that is generated for each backup will
restart elasticsearch after restoring an archived index.
USAGE: ./elasticsearch-backup-index.sh -b S3_BUCKET -i INDEX_DIRECTORY [OPTIONS]
OPTIONS:
-h Show this message
-b S3 path for backups (Required)
-i Elasticsearch index directory (Required)
-d Backup a specific date (format: YYYY.mm.dd)
-c Command for s3cmd (default: s3cmd put)
-t Temporary directory for archiving (default: /tmp)
-p Persist local backups, by default backups are not kept locally
-s Shards (default: 5)
-r Replicas (default: 0)
-e Elasticsearch URL (default: http://localhost:9200)
-n How nice tar must be (default: 19)
-u Restart command for elastic search (default 'service elasticsearch restart')
EXAMPLES:
./elasticsearch-backup-index.sh -b "s3://someBucket" \
-i "/usr/local/elasticsearch/data/node/0/indices"
This uses http://localhost:9200 to connect to elasticsearch and backs up
the index from yesterday (based on system time, be careful with timezones)
./elasticsearch-backup-index.sh -b "s3://bucket" -i "/mnt/es/data/node/0/indices" \
-d "2013.05.21" -c "/usr/local/bin/s3cmd put" -t "/mnt/es/backups" \
-u "service es restart" -e "http://127.0.0.1:9200" -p
Connect to elasticsearch using 127.0.0.1 instead of localhost, backup the
index from 2013.05.21 instead of yesterday, use the s3cmd in /usr/local/bin
explicitly, store the archive and restore script in /mnt/es/backups (and
persist them) and use 'service es restart' to restart elastic search.
EOF
}
if [ "$USER" != 'root' ] && [ "$LOGNAME" != 'root' ]; then
# I don't want to troubleshoot the permissions of others
echo "This script must be run as root."
exit 1
fi
# Defaults
S3CMD="s3cmd put"
TMP_DIR="/tmp"
SHARDS=5
REPLICAS=0
ELASTICSEARCH="http://localhost:9200"
NICE=19
RESTART="service elasticsearch restart"
# Validate shard/replica values
RE_D="^[0-9]+$"
while getopts ":b:i:d:c:t:ps:r:e:n:u:h" flag
do
case "$flag" in
h)
usage
exit 0
;;
b)
S3_BASE=$OPTARG
;;
i)
INDEX_DIR=$OPTARG
;;
d)
DATE=$OPTARG
;;
c)
S3CMD=$OPTARG
;;
t)
TMP_DIR=$OPTARG
;;
p)
PERSIST=1
;;
s)
if [[ $OPTARG =~ $RE_D ]]; then
SHARDS=$OPTARG
else
ERROR="${ERROR}Shards must be an integer.\n"
fi
;;
r)
if [[ $OPTARG =~ $RE_D ]]; then
REPLICAS=$OPTARG
else
ERROR="${ERROR}Replicas must be an integer.\n"
fi
;;
e)
ELASTICSEARCH=$OPTARG
;;
n)
if [[ $OPTARG =~ $RE_D ]]; then
NICE=$OPTARG
fi
# If nice is not an integer, just use default
;;
u)
RESTART=$OPTARG
;;
?)
usage
exit 1
;;
esac
done
# We need an S3 base path
if [ -z "$S3_BASE" ]; then
ERROR="${ERROR}Please provide an s3 bucket and path with -b.\n"
fi
# We need an elasticsearch index directory
if [ -z "INDEX_DIR" ]; then
ERROR="${ERROR}Please provide an Elasticsearch index directory with -i.\n"
fi
# If we have errors, show the errors with usage data and exit.
if [ -n "$ERROR" ]; then
echo -e $ERROR
usage
exit 1
fi
# Default logstash index naming is hardcoded, as are YYYY-mm container directories.
if [ -n "$DATE" ]; then
INDEX="logstash-$DATE"
YEARMONTH=${DATE//\./-}
YEARMONTH=${YEARMONTH:0:7}
else
INDEX=`date --date='yesterday' +"logstash-%Y.%m.%d"`
YEARMONTH=`date +"%Y-%m"`
fi
S3_TARGET="$S3_BASE/$YEARMONTH"
# Make sure there is an index
if ! [ -d $INDEX_DIR/$INDEX ]; then
echo "The index $INDEX_DIR/$INDEX does not appear to exist."
exit 1
fi
# Get metadata from elasticsearch
INDEX_MAPPING=`curl -s -XGET "$ELASTICSEARCH/$INDEX/_mapping"`
SETTINGS="{\"settings\":{\"number_of_shards\":$SHARDS,\"number_of_replicas\":$REPLICAS},\"mappings\":$INDEX_MAPPING}"
# Make the tmp directory if it does not already exist.
if ! [ -d $TMP_DIR ]; then
mkdir -p $TMP_DIR
fi
# Tar and gzip the index dirextory.
cd $INDEX_DIR
nice -n $NICE tar czf $TMP_DIR/$INDEX.tgz $INDEX
cd - > /dev/null
# Create a restore script for elasticsearch
cat << EOF >> $TMP_DIR/${INDEX}-restore.sh
#!/bin/bash
#
# ${INDEX}-restore.sh - restores elasticsearch index: $INDEX to elasticsearch
# instance at $ELASTICSEARCH. This script expects to run in the same
# directory as the $INDEX.tgz file.
# Make sure this index does not exist already
TEST=\`curl -XGET "$ELASTICSEARCH/$INDEX/_status" 2> /dev/null | grep error\`
if [ -z "\$TEST" ]; then
echo "Index: $INDEX already exists on this elasticsearch node."
exit 1
fi
curl -XPUT '$ELASTICSEARCH/$INDEX/' -d '$SETTINGS' > /dev/null 2>&1
# Extract index files
DOWNLOAD_DIR=`pwd`
cd $INDEX_DIR
if [ -f $DOWNLOAD_DIR/$INDEX.tgz ]; then
tar xzf $DOWNLOAD_DIR/$INDEX.tgz
else
echo "Unable to locate archive file $DOWNLOAD_DIR/$INDEX.tgz."
exit 1
fi
# restart elasticsearch to allow it to open the new dir and file data
$RESTART
exit 0
EOF
# Put archive and restore script in s3.
$S3CMD $TMP_DIR/$INDEX.tgz $S3_TARGET/$INDEX.tgz
$S3CMD $TMP_DIR/$INDEX-restore.sh $S3_TARGET/$INDEX-restore.sh
# cleanup tmp files
if [ -z $PERSIST ]; then
rm $TMP_DIR/$INDEX.tgz
rm $TMP_DIR/$INDEX-restore.sh
fi
exit 0
#!/bin/bash
# elasticsearch-remove-old-indices.sh
#
# Delete logstash format indices from elasticsearch maintaining only a
# specified number.
# http://logstash.net
# http://www.elasticsearch.org
#
# Inspiration:
# http://tech.superhappykittymeow.com/?p=296
#
# Must have access to the specified elasticsearch node.
usage()
{
cat << EOF
elasticsearch-remove-old-indices.sh
Compares the current list of indices to a configured value and deletes any
indices surpassing that value. Sort is lexicographical; the first n of a 'sort
-r' list are kept, all others are deleted.
USAGE: ./elasticsearch-remove-old-indices.sh [OPTIONS]
OPTIONS:
-h Show this message
-i Indices to keep (default: 14)
-e Elasticsearch URL (default: http://localhost:9200)
-g Consistent index name (default: logstash)
-o Output actions to a specified file
EXAMPLES:
./elasticsearch-remove-old-indices.sh
Connect to http://localhost:9200 and get a list of indices matching
'logstash'. Keep the top lexicographical 14 indices, delete any others.
./elasticsearch-remove-old-indices.sh -e "http://es.example.com:9200" \
-i 28 -g my-logs -o /mnt/es/logfile.log
Connect to http://es.example.com:9200 and get a list of indices matching
'my-logs'. Keep the top 28 indices, delete any others. When using a custom
index naming scheme be sure that a 'sort -r' places the indices you want to
keep at the top of the list. Output index deletes to /mnt/es/logfile.log.
EOF
}
# Defaults
ELASTICSEARCH="http://localhost:9200"
KEEP=14
GREP="logstash"
# Validate numeric values
RE_D="^[0-9]+$"
while getopts ":i:e:g:o:h" flag
do
case "$flag" in
h)
usage
exit 0
;;
i)
if [[ $OPTARG =~ $RE_D ]]; then
KEEP=$OPTARG
else
ERROR="${ERROR}Indexes to keep must be an integer.\n"
fi
;;
e)
ELASTICSEARCH=$OPTARG
;;
g)
GREP=$OPTARG
;;
o)
LOGFILE=$OPTARG
;;
?)
usage
exit 1
;;
esac
done
# If we have errors, show the errors with usage data and exit.
if [ -n "$ERROR" ]; then
echo -e $ERROR
usage
exit 1
fi
# Get the indices from elasticsearch
INDICES_TEXT=`curl -s "$ELASTICSEARCH/_status?pretty=true" | grep $GREP | grep -v index | sort -r | awk -F\" {'print $2'}`
if [ -z "$INDICES_TEXT" ]; then
echo "No indices returned containing '$GREP' from $ELASTICSEARCH."
exit 1
fi
# If we are logging, make sure we have a logfile TODO - handle errors here
if [ -n "$LOGFILE" ] && ! [ -e $LOGFILE ]; then
touch $LOGFILE
fi
# Delete indices
declare -a INDEX=($INDICES_TEXT)
if [ ${#INDEX[@]} -gt $KEEP ]; then
for index in ${INDEX[@]:$KEEP};do
# We don't want to accidentally delete everything
if [ -n "$index" ]; then
if [ -z "$LOGFILE" ]; then
curl -s -XDELETE "$ELASTICSEARCH/$index/" > /dev/null
else
echo `date "+[%Y-%m-%d %H:%M] "`" Deleting index: $index." >> $LOGFILE
curl -s -XDELETE "$ELASTICSEARCH/$index/" >> $LOGFILE
fi
fi
done
fi
exit 0
#!/bin/bash
#
# elasticsearch-restore-index.sh
#
# Retrieve a specified logstash index from s3 and restore with an accompanying
# restore script.
# http://logstash.net
# http://www.elasticsearch.org
# https://github.com/s3tools/s3cmd | http://s3tools.org/s3cmd
#
# Inspiration:
# http://tech.superhappykittymeow.com/?p=296
#
# Must run on an elasticsearch node with data, the restore script restarts
# elasticsearch.
usage()
{
cat << EOF
elasticsearch-restore-index.sh
USAGE: ./elasticsearch-restore-index.sh -b S3_BUCKET [OPTIONS]
OPTIONS:
-h Show this message
-b S3 path for backups (Required)
-i Elasticsearch index directory (Required)
-d Date to retrieve (Required, format: YYYY.mm.dd)
-t Temporary directory for download and extract (default: /tmp)
-c Command for s3cmd (default: s3cmd get)
-e Elasticsearch URL (default: http://localhost:9200)
-n How nice tar must be (default: 19)
EXAMPLES:
./elasticsearch-restore-index.sh -b "s3://someBucket" -i /mnt/es/data/nodes/0/indices \
-d "2013.05.01"
Get the backup and restore script for the 2013.05.01 index from this s3
bucket and restore the index to the provided elasticsearch index directory.
EOF
}
if [ "$USER" != 'root' ] && [ "$LOGNAME" != 'root' ]; then
# I don't want to troubleshoot the permissions of others
echo "This script must be run as root."
exit 1
fi
# Defaults
S3CMD="s3cmd get"
ELASTICSEARCH="http://localhost:9200"
NICE=19
TMP_DIR="/tmp"
while getopts ":b:i:t:d:c:e:n:h" flag
do
case "$flag" in
h)
usage
exit 0
;;
b)
S3_BASE=$OPTARG
;;
i)
INDEX_DIR=$OPTARG
;;
t)
TMP_DIR=$OPTARG
;;
d)
DATE=$OPTARG
;;
c)
S3CMD=$OPTARG
;;
e)
ELASTICSEARCH=$OPTARG
;;
n)
if [[ $OPTARG =~ $RE_D ]]; then
NICE=$OPTARG
fi
# If nice is not an integer, just use default
;;
?)
usage
exit 1
;;
esac
done
n S3 base path
if [ -z "$S3_BASE" ]; then
ERROR="${ERROR}Please provide an s3 bucket and path with -b.\n"
fi
# We need an elasticsearch index directory
if [ -z "INDEX_DIR" ]; then
ERROR="${ERROR}Please provide an Elasticsearch index directory with -i.\n"
fi
# We need a date to restore
if [ -z "$DATE" ]; then
ERROR="${ERROR}Please provide a date for restoration with -d.\n"
fi
# If we have errors, show the errors with usage data and exit.
if [ -n "$ERROR" ]; then
echo -e $ERROR
usage
exit 1
fi
# Default logstash index naming is hardcoded, as are YYYY-mm container directories.
INDEX="logstash-$DATE"
YEARMONTH=${DATE//\./-}
YEARMONTH=${YEARMONTH:0:7}
S3_TARGET="$S3_BASE/$YEARMONTH"
# Get archive and execute the restore script. TODO check file existence first
$S3CMD $S3_TARGET/$INDEX.tgz $TMP_DIR/$INDEX.tgz
$S3CMD $S3_TARGET/$INDEX-restore.sh $TMP_DIR/$INDEX-restore.sh
if [ -f $TMP_DIR/$INDEX-restore.sh ]; then
chmod 750 $TMP_DIR/$INDEX-restore.sh
$TMP_DIR/$INDEX-restore.sh
# cleanup tmp files
rm $TMP_DIR/$INDEX.tgz
rm $TMP_DIR/$INDEX-restore.sh
else
echo "Unable to find restore script, does that backup exist?"
exit 1
fi
exit 0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment