Skip to content

Instantly share code, notes, and snippets.

@gakhov
Created October 29, 2012 10:43
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save gakhov/3972904 to your computer and use it in GitHub Desktop.
Save gakhov/3972904 to your computer and use it in GitHub Desktop.
Dump items from ES index to json files
#!/bin/bash
# script for backuping ES index to the filesystem
#
# This script gets all records indexed on the specified day
# on SERVER/INDEXNAME and saves them on the filesystem
# as a json file INDEXNAME/BACKUPDIR/BACKUPFILE
# Script also check if mapping and index settings are
# already saved and dump them if not.
#
# Script accept one parameter in format YYYY-MM-DD,
# which specifies the backup date.
################# CONFIGURATION ########################
SERVER="localhost:9200"
INDEXNAME="social"
DOCTYPE="item"
DATEFIELD="index_updated"
INDEXDIR="${INDEXNAME}"
if [ "$#" -ne 1 ]; then
BACKUPDATE=`date --date='yesterday' +"%Y-%m-%d"`
else
read Y M D <<< ${1//-/ }
BACKUPDATE="${Y}-${M}-${D}"
fi
BACKUPNEXTDATE=`date -d "${BACKUPDATE} next day" +"%Y-%m-%d"`
BACKUPDIR="${BACKUPDATE}"
BACKUPFILE="day.json"
################# FUNCTIONS ############################
dump_items_from_es(){
URL=$1
FILEPATH=$2
QUERY=$3
echo "Calculating number of documents ... "
RESPONSE=`curl -XGET "http://${URL}/${DOCTYPE}/_search?search_type=count" -d "${QUERY}"`
COUNT=`echo $RESPONSE | python -c 'import json,sys; obj=json.load(sys.stdin); print obj.get("hits",{}).get("total", 0)'`
if [ "$COUNT" -gt 0 ]; then
echo "Requesting ${COUNT} items ... "
curl -XGET -o "${FILEPATH}" "http://${URL}/${DOCTYPE}/_search?search_type=query_and_fetch&size=${COUNT}" -d "${QUERY}"
echo "DONE"
else
echo "No items for this period"
fi
}
################# MAIN ############################
# check for index directory
if [ ! -d "${INDEXDIR}" ]; then
echo -n "Creating an index dir ... "
mkdir $INDEXDIR
echo "DONE"
fi
# check for index settings file
if [ ! -e "${INDEXDIR}/settings.json" ]; then
echo -n "Backing up index settings ... "
curl -XGET -o "${INDEXDIR}/settings.json" "http://${SERVER}/${INDEXNAME}/_settings?pretty=true" > /dev/null 2>&1
echo "DONE"
fi
# check for index mapping file
if [ ! -e "${INDEXDIR}/mapping.json" ]; then
echo -n "Backing up metadata ... "
curl -XGET -o "${INDEXDIR}/mapping.json" "http://${SERVER}/${INDEXNAME}/_mapping?pretty=true" > /dev/null 2>&1
echo "DONE"
fi
# check for backup directory
if [ ! -d "${INDEXDIR}/${BACKUPDIR}" ]; then
echo -n "Creating a backup dir ... "
mkdir "${INDEXDIR}/${BACKUPDIR}"
echo "DONE"
fi
QUERY="{\"query\":{\"range\":{\"index_updated\":{\"gte\":\"${BACKUPDATE}T00:00:00.000000Z\",\"lt\":\"${BACKUPNEXTDATE}T00:00:00.000000Z\"}}}}"
dump_items_from_es "${SERVER}/${INDEXNAME}" "$INDEXDIR/${BACKUPDIR}/${BACKUPFILE}" "${QUERY}"
echo "COMPLETED"
#!/bin/bash
# backup script for backup ES index to the filesystem
#
# This scripts gets all records indexed last hour
# on SERVER/INDEXNAME and saves them on the filesystem
# as a json file INDEXNAME/BACKUPDIR/BACKUPFILE
# Script also check if mapping and index settings are
# already saved and dump them if not.
################# CONFIGURATION ########################
SERVER="localhost:9200"
INDEXNAME="social"
DOCTYPE="item"
DATEFIELD="index_updated"
INDEXDIR="${INDEXNAME}"
BACKUPSTART=`date --date='last hour' +"%Y-%m-%dT%H:00:00"`
BACKUPEND=`date +"%Y-%m-%dT%H:00:00"`
BACKUPDATE=`date -d "${BACKUPSTART}" +"%Y-%m-%d"`
BACKUPDIR="${BACKUPDATE}"
BACKUPFILE=`date -d "${BACKUPSTART}" +"%H.%Y-%m-%d.json"`
################# FUNCTIONS ############################
dump_items_from_es(){
URL=$1
FILEPATH=$2
QUERY=$3
echo "Calculating number of documents ... "
RESPONSE=`curl -XGET "http://${URL}/${DOCTYPE}/_search?search_type=count" -d "${QUERY}"`
COUNT=`echo $RESPONSE | python -c 'import json,sys; obj=json.load(sys.stdin); print obj.get("hits",{}).get("total", 0)'`
if [ "$COUNT" -gt 0 ]; then
echo "Requesting ${COUNT} items ... "
curl -XGET -o "${FILEPATH}" "http://${URL}/${DOCTYPE}/_search?search_type=query_and_fetch&size=${COUNT}" -d "${QUERY}"
echo "DONE"
else
echo "No items for this period"
fi
}
################# MAIN ############################
# check for index directory
if [ ! -d "$INDEXDIR" ]; then
echo -n "Creating an index dir ... "
mkdir $INDEXDIR
echo "DONE"
fi
# check for index settings file
if [ ! -e "${INDEXDIR}/settings.json" ]; then
echo -n "Backing up index settings ... "
curl -XGET -o "${INDEXDIR}/settings.json" "http://${SERVER}/${INDEXNAME}/_settings?pretty=true" > /dev/null 2>&1
echo "DONE"
fi
# check for index mapping file
if [ ! -e "${INDEXDIR}/mapping.json" ]; then
echo -n "Backing up metadata ... "
curl -XGET -o "${INDEXDIR}/mapping.json" "http://${SERVER}/${INDEXNAME}/_mapping?pretty=true" > /dev/null 2>&1
echo "DONE"
fi
# check for backup directory
if [ ! -d "${INDEXDIR}/${BACKUPDIR}" ]; then
echo -n "Creating a backup dir ... "
mkdir "${INDEXDIR}/${BACKUPDIR}"
echo "DONE"
fi
QUERY="{\"query\":{\"range\":{\"${DATEFIELD}\":{\"gte\":\"${BACKUPSTART}.000000Z\",\"lt\":\"${BACKUPEND}.000000Z\"}}}}"
dump_items_from_es "${SERVER}/${INDEXNAME}" "${INDEXDIR}/${BACKUPDIR}/${BACKUPFILE}" "${QUERY}"
echo "COMPLETED"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment