Created
August 5, 2013 15:09
-
-
Save arioch/6156667 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# | |
# elasticsearch-backup-index.sh | |
# | |
# Push logstash index from yesterday to s3 with an accompanying restore script. | |
# http://logstash.net | |
# http://www.elasticsearch.org | |
# https://github.com/s3tools/s3cmd | http://s3tools.org/s3cmd | |
# | |
# Inspiration: | |
# http://tech.superhappykittymeow.com/?p=296 | |
# | |
# Must run on an elasticsearch node, and expects to find the index on this node. | |
usage() | |
{ | |
cat << EOF | |
elasticsearch-backup-index.sh | |
Create a restorable backup of an elasticsearch index (assumes Logstash format | |
indexes), and upload it to an existing S3 bucket. The default backs up an | |
index from yesterday. Note that this script itself does not restart | |
elasticsearch - the restore script that is generated for each backup will | |
restart elasticsearch after restoring an archived index. | |
USAGE: ./elasticsearch-backup-index.sh -b S3_BUCKET -i INDEX_DIRECTORY [OPTIONS] | |
OPTIONS: | |
-h Show this message | |
-b S3 path for backups (Required) | |
-i Elasticsearch index directory (Required) | |
-d Backup a specific date (format: YYYY.mm.dd) | |
-c Command for s3cmd (default: s3cmd put) | |
-t Temporary directory for archiving (default: /tmp) | |
-p Persist local backups, by default backups are not kept locally | |
-s Shards (default: 5) | |
-r Replicas (default: 0) | |
-e Elasticsearch URL (default: http://localhost:9200) | |
-n How nice tar must be (default: 19) | |
-u Restart command for elastic search (default 'service elasticsearch restart') | |
EXAMPLES: | |
./elasticsearch-backup-index.sh -b "s3://someBucket" \ | |
-i "/usr/local/elasticsearch/data/node/0/indices" | |
This uses http://localhost:9200 to connect to elasticsearch and backs up | |
the index from yesterday (based on system time, be careful with timezones) | |
./elasticsearch-backup-index.sh -b "s3://bucket" -i "/mnt/es/data/node/0/indices" \ | |
-d "2013.05.21" -c "/usr/local/bin/s3cmd put" -t "/mnt/es/backups" \ | |
-u "service es restart" -e "http://127.0.0.1:9200" -p | |
Connect to elasticsearch using 127.0.0.1 instead of localhost, backup the | |
index from 2013.05.21 instead of yesterday, use the s3cmd in /usr/local/bin | |
explicitly, store the archive and restore script in /mnt/es/backups (and | |
persist them) and use 'service es restart' to restart elastic search. | |
EOF | |
} | |
if [ "$USER" != 'root' ] && [ "$LOGNAME" != 'root' ]; then | |
# I don't want to troubleshoot the permissions of others | |
echo "This script must be run as root." | |
exit 1 | |
fi | |
# Defaults | |
S3CMD="s3cmd put" | |
TMP_DIR="/tmp" | |
SHARDS=5 | |
REPLICAS=0 | |
ELASTICSEARCH="http://localhost:9200" | |
NICE=19 | |
RESTART="service elasticsearch restart" | |
# Validate shard/replica values | |
RE_D="^[0-9]+$" | |
while getopts ":b:i:d:c:t:ps:r:e:n:u:h" flag | |
do | |
case "$flag" in | |
h) | |
usage | |
exit 0 | |
;; | |
b) | |
S3_BASE=$OPTARG | |
;; | |
i) | |
INDEX_DIR=$OPTARG | |
;; | |
d) | |
DATE=$OPTARG | |
;; | |
c) | |
S3CMD=$OPTARG | |
;; | |
t) | |
TMP_DIR=$OPTARG | |
;; | |
p) | |
PERSIST=1 | |
;; | |
s) | |
if [[ $OPTARG =~ $RE_D ]]; then | |
SHARDS=$OPTARG | |
else | |
ERROR="${ERROR}Shards must be an integer.\n" | |
fi | |
;; | |
r) | |
if [[ $OPTARG =~ $RE_D ]]; then | |
REPLICAS=$OPTARG | |
else | |
ERROR="${ERROR}Replicas must be an integer.\n" | |
fi | |
;; | |
e) | |
ELASTICSEARCH=$OPTARG | |
;; | |
n) | |
if [[ $OPTARG =~ $RE_D ]]; then | |
NICE=$OPTARG | |
fi | |
# If nice is not an integer, just use default | |
;; | |
u) | |
RESTART=$OPTARG | |
;; | |
?) | |
usage | |
exit 1 | |
;; | |
esac | |
done | |
# We need an S3 base path | |
if [ -z "$S3_BASE" ]; then | |
ERROR="${ERROR}Please provide an s3 bucket and path with -b.\n" | |
fi | |
# We need an elasticsearch index directory | |
if [ -z "INDEX_DIR" ]; then | |
ERROR="${ERROR}Please provide an Elasticsearch index directory with -i.\n" | |
fi | |
# If we have errors, show the errors with usage data and exit. | |
if [ -n "$ERROR" ]; then | |
echo -e $ERROR | |
usage | |
exit 1 | |
fi | |
# Default logstash index naming is hardcoded, as are YYYY-mm container directories. | |
if [ -n "$DATE" ]; then | |
INDEX="logstash-$DATE" | |
YEARMONTH=${DATE//\./-} | |
YEARMONTH=${YEARMONTH:0:7} | |
else | |
INDEX=`date --date='yesterday' +"logstash-%Y.%m.%d"` | |
YEARMONTH=`date +"%Y-%m"` | |
fi | |
S3_TARGET="$S3_BASE/$YEARMONTH" | |
# Make sure there is an index | |
if ! [ -d $INDEX_DIR/$INDEX ]; then | |
echo "The index $INDEX_DIR/$INDEX does not appear to exist." | |
exit 1 | |
fi | |
# Get metadata from elasticsearch | |
INDEX_MAPPING=`curl -s -XGET "$ELASTICSEARCH/$INDEX/_mapping"` | |
SETTINGS="{\"settings\":{\"number_of_shards\":$SHARDS,\"number_of_replicas\":$REPLICAS},\"mappings\":$INDEX_MAPPING}" | |
# Make the tmp directory if it does not already exist. | |
if ! [ -d $TMP_DIR ]; then | |
mkdir -p $TMP_DIR | |
fi | |
# Tar and gzip the index dirextory. | |
cd $INDEX_DIR | |
nice -n $NICE tar czf $TMP_DIR/$INDEX.tgz $INDEX | |
cd - > /dev/null | |
# Create a restore script for elasticsearch | |
cat << EOF >> $TMP_DIR/${INDEX}-restore.sh | |
#!/bin/bash | |
# | |
# ${INDEX}-restore.sh - restores elasticsearch index: $INDEX to elasticsearch | |
# instance at $ELASTICSEARCH. This script expects to run in the same | |
# directory as the $INDEX.tgz file. | |
# Make sure this index does not exist already | |
TEST=\`curl -XGET "$ELASTICSEARCH/$INDEX/_status" 2> /dev/null | grep error\` | |
if [ -z "\$TEST" ]; then | |
echo "Index: $INDEX already exists on this elasticsearch node." | |
exit 1 | |
fi | |
curl -XPUT '$ELASTICSEARCH/$INDEX/' -d '$SETTINGS' > /dev/null 2>&1 | |
# Extract index files | |
DOWNLOAD_DIR=`pwd` | |
cd $INDEX_DIR | |
if [ -f $DOWNLOAD_DIR/$INDEX.tgz ]; then | |
tar xzf $DOWNLOAD_DIR/$INDEX.tgz | |
else | |
echo "Unable to locate archive file $DOWNLOAD_DIR/$INDEX.tgz." | |
exit 1 | |
fi | |
# restart elasticsearch to allow it to open the new dir and file data | |
$RESTART | |
exit 0 | |
EOF | |
# Put archive and restore script in s3. | |
$S3CMD $TMP_DIR/$INDEX.tgz $S3_TARGET/$INDEX.tgz | |
$S3CMD $TMP_DIR/$INDEX-restore.sh $S3_TARGET/$INDEX-restore.sh | |
# cleanup tmp files | |
if [ -z $PERSIST ]; then | |
rm $TMP_DIR/$INDEX.tgz | |
rm $TMP_DIR/$INDEX-restore.sh | |
fi | |
exit 0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# elasticsearch-remove-old-indices.sh | |
# | |
# Delete logstash format indices from elasticsearch maintaining only a | |
# specified number. | |
# http://logstash.net | |
# http://www.elasticsearch.org | |
# | |
# Inspiration: | |
# http://tech.superhappykittymeow.com/?p=296 | |
# | |
# Must have access to the specified elasticsearch node. | |
usage() | |
{ | |
cat << EOF | |
elasticsearch-remove-old-indices.sh | |
Compares the current list of indices to a configured value and deletes any | |
indices surpassing that value. Sort is lexicographical; the first n of a 'sort | |
-r' list are kept, all others are deleted. | |
USAGE: ./elasticsearch-remove-old-indices.sh [OPTIONS] | |
OPTIONS: | |
-h Show this message | |
-i Indices to keep (default: 14) | |
-e Elasticsearch URL (default: http://localhost:9200) | |
-g Consistent index name (default: logstash) | |
-o Output actions to a specified file | |
EXAMPLES: | |
./elasticsearch-remove-old-indices.sh | |
Connect to http://localhost:9200 and get a list of indices matching | |
'logstash'. Keep the top lexicographical 14 indices, delete any others. | |
./elasticsearch-remove-old-indices.sh -e "http://es.example.com:9200" \ | |
-i 28 -g my-logs -o /mnt/es/logfile.log | |
Connect to http://es.example.com:9200 and get a list of indices matching | |
'my-logs'. Keep the top 28 indices, delete any others. When using a custom | |
index naming scheme be sure that a 'sort -r' places the indices you want to | |
keep at the top of the list. Output index deletes to /mnt/es/logfile.log. | |
EOF | |
} | |
# Defaults | |
ELASTICSEARCH="http://localhost:9200" | |
KEEP=14 | |
GREP="logstash" | |
# Validate numeric values | |
RE_D="^[0-9]+$" | |
while getopts ":i:e:g:o:h" flag | |
do | |
case "$flag" in | |
h) | |
usage | |
exit 0 | |
;; | |
i) | |
if [[ $OPTARG =~ $RE_D ]]; then | |
KEEP=$OPTARG | |
else | |
ERROR="${ERROR}Indexes to keep must be an integer.\n" | |
fi | |
;; | |
e) | |
ELASTICSEARCH=$OPTARG | |
;; | |
g) | |
GREP=$OPTARG | |
;; | |
o) | |
LOGFILE=$OPTARG | |
;; | |
?) | |
usage | |
exit 1 | |
;; | |
esac | |
done | |
# If we have errors, show the errors with usage data and exit. | |
if [ -n "$ERROR" ]; then | |
echo -e $ERROR | |
usage | |
exit 1 | |
fi | |
# Get the indices from elasticsearch | |
INDICES_TEXT=`curl -s "$ELASTICSEARCH/_status?pretty=true" | grep $GREP | grep -v index | sort -r | awk -F\" {'print $2'}` | |
if [ -z "$INDICES_TEXT" ]; then | |
echo "No indices returned containing '$GREP' from $ELASTICSEARCH." | |
exit 1 | |
fi | |
# If we are logging, make sure we have a logfile TODO - handle errors here | |
if [ -n "$LOGFILE" ] && ! [ -e $LOGFILE ]; then | |
touch $LOGFILE | |
fi | |
# Delete indices | |
declare -a INDEX=($INDICES_TEXT) | |
if [ ${#INDEX[@]} -gt $KEEP ]; then | |
for index in ${INDEX[@]:$KEEP};do | |
# We don't want to accidentally delete everything | |
if [ -n "$index" ]; then | |
if [ -z "$LOGFILE" ]; then | |
curl -s -XDELETE "$ELASTICSEARCH/$index/" > /dev/null | |
else | |
echo `date "+[%Y-%m-%d %H:%M] "`" Deleting index: $index." >> $LOGFILE | |
curl -s -XDELETE "$ELASTICSEARCH/$index/" >> $LOGFILE | |
fi | |
fi | |
done | |
fi | |
exit 0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# | |
# elasticsearch-restore-index.sh | |
# | |
# Retrieve a specified logstash index from s3 and restore with an accompanying | |
# restore script. | |
# http://logstash.net | |
# http://www.elasticsearch.org | |
# https://github.com/s3tools/s3cmd | http://s3tools.org/s3cmd | |
# | |
# Inspiration: | |
# http://tech.superhappykittymeow.com/?p=296 | |
# | |
# Must run on an elasticsearch node with data, the restore script restarts | |
# elasticsearch. | |
usage() | |
{ | |
cat << EOF | |
elasticsearch-restore-index.sh | |
USAGE: ./elasticsearch-restore-index.sh -b S3_BUCKET [OPTIONS] | |
OPTIONS: | |
-h Show this message | |
-b S3 path for backups (Required) | |
-i Elasticsearch index directory (Required) | |
-d Date to retrieve (Required, format: YYYY.mm.dd) | |
-t Temporary directory for download and extract (default: /tmp) | |
-c Command for s3cmd (default: s3cmd get) | |
-e Elasticsearch URL (default: http://localhost:9200) | |
-n How nice tar must be (default: 19) | |
EXAMPLES: | |
./elasticsearch-restore-index.sh -b "s3://someBucket" -i /mnt/es/data/nodes/0/indices \ | |
-d "2013.05.01" | |
Get the backup and restore script for the 2013.05.01 index from this s3 | |
bucket and restore the index to the provided elasticsearch index directory. | |
EOF | |
} | |
if [ "$USER" != 'root' ] && [ "$LOGNAME" != 'root' ]; then | |
# I don't want to troubleshoot the permissions of others | |
echo "This script must be run as root." | |
exit 1 | |
fi | |
# Defaults | |
S3CMD="s3cmd get" | |
ELASTICSEARCH="http://localhost:9200" | |
NICE=19 | |
TMP_DIR="/tmp" | |
while getopts ":b:i:t:d:c:e:n:h" flag | |
do | |
case "$flag" in | |
h) | |
usage | |
exit 0 | |
;; | |
b) | |
S3_BASE=$OPTARG | |
;; | |
i) | |
INDEX_DIR=$OPTARG | |
;; | |
t) | |
TMP_DIR=$OPTARG | |
;; | |
d) | |
DATE=$OPTARG | |
;; | |
c) | |
S3CMD=$OPTARG | |
;; | |
e) | |
ELASTICSEARCH=$OPTARG | |
;; | |
n) | |
if [[ $OPTARG =~ $RE_D ]]; then | |
NICE=$OPTARG | |
fi | |
# If nice is not an integer, just use default | |
;; | |
?) | |
usage | |
exit 1 | |
;; | |
esac | |
done | |
n S3 base path | |
if [ -z "$S3_BASE" ]; then | |
ERROR="${ERROR}Please provide an s3 bucket and path with -b.\n" | |
fi | |
# We need an elasticsearch index directory | |
if [ -z "INDEX_DIR" ]; then | |
ERROR="${ERROR}Please provide an Elasticsearch index directory with -i.\n" | |
fi | |
# We need a date to restore | |
if [ -z "$DATE" ]; then | |
ERROR="${ERROR}Please provide a date for restoration with -d.\n" | |
fi | |
# If we have errors, show the errors with usage data and exit. | |
if [ -n "$ERROR" ]; then | |
echo -e $ERROR | |
usage | |
exit 1 | |
fi | |
# Default logstash index naming is hardcoded, as are YYYY-mm container directories. | |
INDEX="logstash-$DATE" | |
YEARMONTH=${DATE//\./-} | |
YEARMONTH=${YEARMONTH:0:7} | |
S3_TARGET="$S3_BASE/$YEARMONTH" | |
# Get archive and execute the restore script. TODO check file existence first | |
$S3CMD $S3_TARGET/$INDEX.tgz $TMP_DIR/$INDEX.tgz | |
$S3CMD $S3_TARGET/$INDEX-restore.sh $TMP_DIR/$INDEX-restore.sh | |
if [ -f $TMP_DIR/$INDEX-restore.sh ]; then | |
chmod 750 $TMP_DIR/$INDEX-restore.sh | |
$TMP_DIR/$INDEX-restore.sh | |
# cleanup tmp files | |
rm $TMP_DIR/$INDEX.tgz | |
rm $TMP_DIR/$INDEX-restore.sh | |
else | |
echo "Unable to find restore script, does that backup exist?" | |
exit 1 | |
fi | |
exit 0 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment