nherment/backup.sh

## backup.sh
#!/bin/bash
# herein we backup our indexes! this script should run at like 6pm or something, after logstash
# rotates to a new ES index and theres no new data coming in to the old one. we grab metadatas,
# compress the data files, create a restore script, and push it all up to S3.
TODAY=`date +"%Y.%m.%d"`
INDEXNAME="logstash-$TODAY" # this had better match the index name in ES
INDEXDIR="/usr/local/elasticsearch/data/logstash/nodes/0/indices/"
BACKUPCMD="/usr/local/backupTools/s3cmd --config=/usr/local/backupTools/s3cfg put"
BACKUPDIR="/mnt/es-backups/"
YEARMONTH=`date +"%Y-%m"`
S3TARGET="s3://backups/elasticsearch/$YEARMONTH/$INDEXNAME"

# create mapping file with index settings. this metadata is required by ES to use index file data
echo -n "Backing up metadata… "
curl -XGET -o /tmp/mapping "http://localhost:9200/$INDEXNAME/_mapping?pretty=true" > /dev/null 2>&1
sed -i '1,2d' /tmp/mapping #strip the first two lines of the metadata
echo '{"settings":{"number_of_shards":5,"number_of_replicas":1},"mappings":{' >> /tmp/mappost
# prepend hardcoded settings metadata to index-specific metadata
cat /tmp/mapping >> /tmp/mappost
echo "DONE!"

# now lets tar up our data files. these are huge, so lets be nice
echo -n "Backing up data files (this may take some time)… "
mkdir -p $BACKUPDIR
cd $INDEXDIR
nice -n 19 tar czf $BACKUPDIR/$INDEXNAME.tar.gz $INDEXNAME
echo "DONE!"

echo -n "Creating restore script… "
# time to create our restore script! oh god scripts creating scripts, this never ends well…
cat << EOF >> $BACKUPDIR/$INDEXNAME-restore.sh
#!/bin/bash
# this script requires $INDEXNAME.tar.gz and will restore it into elasticsearch
# it is ESSENTIAL that the index you are restoring does NOT exist in ES. delete it
# if it does BEFORE trying to restore data.

# create index and mapping
echo -n "Creating index and mappings… "
curl -XPUT 'http://localhost:9200/$INDEXNAME/' -d '`cat /tmp/mappost`' > /dev/null 2>&1
echo "DONE!"

# extract our data files into place
echo -n "Restoring index (this may take a while)… "
cd $INDEXDIR
tar xzf $BACKUPDIR/$INDEXNAME.tar.gz
echo "DONE!"

# restart ES to allow it to open the new dir and file data
echo -n "Restarting Elasticsearch… "
/etc/init.d/es restart
echo "DONE!"
EOF
echo "DONE!" # restore script done

# push both tar.gz and restore script to s3
echo -n "Saving to S3 (this may take some time)… "
$BACKUPCMD $BACKUPDIR/$INDEXNAME.tar.gz $S3TARGET.tar.gz
$BACKUPCMD $BACKUPDIR/$INDEXNAME-restore.sh $S3TARGET-restore.sh
echo "DONE!"

# cleanup tmp files
rm /tmp/mappost
rm /tmp/mapping

## restore.sh
#!/bin/bash
# Performs 'rotation' of ES indices. Maintains only 8 indicies (1 week) of logstash logs; this script
# is to be run at midnight daily and removes the oldest one (as well as any 1970s-era log indices,
# as these are a product of timestamp fail). Please note the insane amount of error-checking
# in this script, as ES would rather delete everything than nothing…
# Before we do anything, let's get rid of any nasty 1970s-era indices we have floating around
TIMESTAMPFAIL=`curl -s localhost:9200/_status?pretty=true |grep index |grep log |sort |uniq |awk -F\" '{print $4}' |grep 1970 |wc -l`
if [ -n $TIMESTAMPFAIL ]
then
curl -s localhost:9200/_status?pretty=true |grep index |grep log |sort |uniq |awk -F\" '{print $4}' |grep 1970 | while read line
do
echo "Indices with screwed-up timestamps found; removing"
echo -n "Deleting index $line: "
curl -s -XDELETE http://localhost:9200/$line/
echo "DONE!"
done
fi

# Get list of indices; should we rotate?
INDEXCOUNT=`curl -s localhost:9200/_status?pretty=true |grep index |grep log |sort |uniq |awk -F\" '{print $4}' |wc -l`
if [ $INDEXCOUNT -lt "9" ]
then
echo "Less than 8 indices, bailing with no action"
exit 0
else
echo "More than 8 indices, time to do some cleaning"

# Let's do some cleaning!
OLDESTLOG=`curl -s localhost:9200/_status?pretty=true |grep index |grep log |sort |uniq |awk -F\" '{print $4}' |head -n1`
echo -n "Deleting oldest index, $OLDESTLOG: "
curl -s -XDELETE http://localhost:9200/$OLDESTLOG/
echo "DONE!"
fi
	#!/bin/bash
	# herein we backup our indexes! this script should run at like 6pm or something, after logstash
	# rotates to a new ES index and theres no new data coming in to the old one. we grab metadatas,
	# compress the data files, create a restore script, and push it all up to S3.
	TODAY=`date +"%Y.%m.%d"`
	INDEXNAME="logstash-$TODAY" # this had better match the index name in ES
	INDEXDIR="/usr/local/elasticsearch/data/logstash/nodes/0/indices/"
	BACKUPCMD="/usr/local/backupTools/s3cmd --config=/usr/local/backupTools/s3cfg put"
	BACKUPDIR="/mnt/es-backups/"
	YEARMONTH=`date +"%Y-%m"`
	S3TARGET="s3://backups/elasticsearch/$YEARMONTH/$INDEXNAME"

	# create mapping file with index settings. this metadata is required by ES to use index file data
	echo -n "Backing up metadata… "
	curl -XGET -o /tmp/mapping "http://localhost:9200/$INDEXNAME/_mapping?pretty=true" > /dev/null 2>&1
	sed -i '1,2d' /tmp/mapping #strip the first two lines of the metadata
	echo '{"settings":{"number_of_shards":5,"number_of_replicas":1},"mappings":{' >> /tmp/mappost
	# prepend hardcoded settings metadata to index-specific metadata
	cat /tmp/mapping >> /tmp/mappost
	echo "DONE!"

	# now lets tar up our data files. these are huge, so lets be nice
	echo -n "Backing up data files (this may take some time)… "
	mkdir -p $BACKUPDIR
	cd $INDEXDIR
	nice -n 19 tar czf $BACKUPDIR/$INDEXNAME.tar.gz $INDEXNAME
	echo "DONE!"

	echo -n "Creating restore script… "
	# time to create our restore script! oh god scripts creating scripts, this never ends well…
	cat << EOF >> $BACKUPDIR/$INDEXNAME-restore.sh
	#!/bin/bash
	# this script requires $INDEXNAME.tar.gz and will restore it into elasticsearch
	# it is ESSENTIAL that the index you are restoring does NOT exist in ES. delete it
	# if it does BEFORE trying to restore data.

	# create index and mapping
	echo -n "Creating index and mappings… "
	curl -XPUT 'http://localhost:9200/$INDEXNAME/' -d '`cat /tmp/mappost`' > /dev/null 2>&1
	echo "DONE!"

	# extract our data files into place
	echo -n "Restoring index (this may take a while)… "
	cd $INDEXDIR
	tar xzf $BACKUPDIR/$INDEXNAME.tar.gz
	echo "DONE!"

	# restart ES to allow it to open the new dir and file data
	echo -n "Restarting Elasticsearch… "
	/etc/init.d/es restart
	echo "DONE!"
	EOF
	echo "DONE!" # restore script done

	# push both tar.gz and restore script to s3
	echo -n "Saving to S3 (this may take some time)… "
	$BACKUPCMD $BACKUPDIR/$INDEXNAME.tar.gz $S3TARGET.tar.gz
	$BACKUPCMD $BACKUPDIR/$INDEXNAME-restore.sh $S3TARGET-restore.sh
	echo "DONE!"

	# cleanup tmp files
	rm /tmp/mappost
	rm /tmp/mapping
	#!/bin/bash
	# Performs 'rotation' of ES indices. Maintains only 8 indicies (1 week) of logstash logs; this script
	# is to be run at midnight daily and removes the oldest one (as well as any 1970s-era log indices,
	# as these are a product of timestamp fail). Please note the insane amount of error-checking
	# in this script, as ES would rather delete everything than nothing…
	# Before we do anything, let's get rid of any nasty 1970s-era indices we have floating around
	TIMESTAMPFAIL=`curl -s localhost:9200/_status?pretty=true \|grep index \|grep log \|sort \|uniq \|awk -F\" '{print $4}' \|grep 1970 \|wc -l`
	if [ -n $TIMESTAMPFAIL ]
	then
	curl -s localhost:9200/_status?pretty=true \|grep index \|grep log \|sort \|uniq \|awk -F\" '{print $4}' \|grep 1970 \| while read line
	do
	echo "Indices with screwed-up timestamps found; removing"
	echo -n "Deleting index $line: "
	curl -s -XDELETE http://localhost:9200/$line/
	echo "DONE!"
	done
	fi

	# Get list of indices; should we rotate?
	INDEXCOUNT=`curl -s localhost:9200/_status?pretty=true \|grep index \|grep log \|sort \|uniq \|awk -F\" '{print $4}' \|wc -l`
	if [ $INDEXCOUNT -lt "9" ]
	then
	echo "Less than 8 indices, bailing with no action"
	exit 0
	else
	echo "More than 8 indices, time to do some cleaning"

	# Let's do some cleaning!
	OLDESTLOG=`curl -s localhost:9200/_status?pretty=true \|grep index \|grep log \|sort \|uniq \|awk -F\" '{print $4}' \|head -n1`
	echo -n "Deleting oldest index, $OLDESTLOG: "
	curl -s -XDELETE http://localhost:9200/$OLDESTLOG/
	echo "DONE!"
	fi