Created
October 29, 2012 10:43
-
-
Save gakhov/3972904 to your computer and use it in GitHub Desktop.
Dump items from ES index to json files
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# script for backuping ES index to the filesystem | |
# | |
# This script gets all records indexed on the specified day | |
# on SERVER/INDEXNAME and saves them on the filesystem | |
# as a json file INDEXNAME/BACKUPDIR/BACKUPFILE | |
# Script also check if mapping and index settings are | |
# already saved and dump them if not. | |
# | |
# Script accept one parameter in format YYYY-MM-DD, | |
# which specifies the backup date. | |
################# CONFIGURATION ######################## | |
SERVER="localhost:9200" | |
INDEXNAME="social" | |
DOCTYPE="item" | |
DATEFIELD="index_updated" | |
INDEXDIR="${INDEXNAME}" | |
if [ "$#" -ne 1 ]; then | |
BACKUPDATE=`date --date='yesterday' +"%Y-%m-%d"` | |
else | |
read Y M D <<< ${1//-/ } | |
BACKUPDATE="${Y}-${M}-${D}" | |
fi | |
BACKUPNEXTDATE=`date -d "${BACKUPDATE} next day" +"%Y-%m-%d"` | |
BACKUPDIR="${BACKUPDATE}" | |
BACKUPFILE="day.json" | |
################# FUNCTIONS ############################ | |
dump_items_from_es(){ | |
URL=$1 | |
FILEPATH=$2 | |
QUERY=$3 | |
echo "Calculating number of documents ... " | |
RESPONSE=`curl -XGET "http://${URL}/${DOCTYPE}/_search?search_type=count" -d "${QUERY}"` | |
COUNT=`echo $RESPONSE | python -c 'import json,sys; obj=json.load(sys.stdin); print obj.get("hits",{}).get("total", 0)'` | |
if [ "$COUNT" -gt 0 ]; then | |
echo "Requesting ${COUNT} items ... " | |
curl -XGET -o "${FILEPATH}" "http://${URL}/${DOCTYPE}/_search?search_type=query_and_fetch&size=${COUNT}" -d "${QUERY}" | |
echo "DONE" | |
else | |
echo "No items for this period" | |
fi | |
} | |
################# MAIN ############################ | |
# check for index directory | |
if [ ! -d "${INDEXDIR}" ]; then | |
echo -n "Creating an index dir ... " | |
mkdir $INDEXDIR | |
echo "DONE" | |
fi | |
# check for index settings file | |
if [ ! -e "${INDEXDIR}/settings.json" ]; then | |
echo -n "Backing up index settings ... " | |
curl -XGET -o "${INDEXDIR}/settings.json" "http://${SERVER}/${INDEXNAME}/_settings?pretty=true" > /dev/null 2>&1 | |
echo "DONE" | |
fi | |
# check for index mapping file | |
if [ ! -e "${INDEXDIR}/mapping.json" ]; then | |
echo -n "Backing up metadata ... " | |
curl -XGET -o "${INDEXDIR}/mapping.json" "http://${SERVER}/${INDEXNAME}/_mapping?pretty=true" > /dev/null 2>&1 | |
echo "DONE" | |
fi | |
# check for backup directory | |
if [ ! -d "${INDEXDIR}/${BACKUPDIR}" ]; then | |
echo -n "Creating a backup dir ... " | |
mkdir "${INDEXDIR}/${BACKUPDIR}" | |
echo "DONE" | |
fi | |
QUERY="{\"query\":{\"range\":{\"index_updated\":{\"gte\":\"${BACKUPDATE}T00:00:00.000000Z\",\"lt\":\"${BACKUPNEXTDATE}T00:00:00.000000Z\"}}}}" | |
dump_items_from_es "${SERVER}/${INDEXNAME}" "$INDEXDIR/${BACKUPDIR}/${BACKUPFILE}" "${QUERY}" | |
echo "COMPLETED" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# backup script for backup ES index to the filesystem | |
# | |
# This scripts gets all records indexed last hour | |
# on SERVER/INDEXNAME and saves them on the filesystem | |
# as a json file INDEXNAME/BACKUPDIR/BACKUPFILE | |
# Script also check if mapping and index settings are | |
# already saved and dump them if not. | |
################# CONFIGURATION ######################## | |
SERVER="localhost:9200" | |
INDEXNAME="social" | |
DOCTYPE="item" | |
DATEFIELD="index_updated" | |
INDEXDIR="${INDEXNAME}" | |
BACKUPSTART=`date --date='last hour' +"%Y-%m-%dT%H:00:00"` | |
BACKUPEND=`date +"%Y-%m-%dT%H:00:00"` | |
BACKUPDATE=`date -d "${BACKUPSTART}" +"%Y-%m-%d"` | |
BACKUPDIR="${BACKUPDATE}" | |
BACKUPFILE=`date -d "${BACKUPSTART}" +"%H.%Y-%m-%d.json"` | |
################# FUNCTIONS ############################ | |
dump_items_from_es(){ | |
URL=$1 | |
FILEPATH=$2 | |
QUERY=$3 | |
echo "Calculating number of documents ... " | |
RESPONSE=`curl -XGET "http://${URL}/${DOCTYPE}/_search?search_type=count" -d "${QUERY}"` | |
COUNT=`echo $RESPONSE | python -c 'import json,sys; obj=json.load(sys.stdin); print obj.get("hits",{}).get("total", 0)'` | |
if [ "$COUNT" -gt 0 ]; then | |
echo "Requesting ${COUNT} items ... " | |
curl -XGET -o "${FILEPATH}" "http://${URL}/${DOCTYPE}/_search?search_type=query_and_fetch&size=${COUNT}" -d "${QUERY}" | |
echo "DONE" | |
else | |
echo "No items for this period" | |
fi | |
} | |
################# MAIN ############################ | |
# check for index directory | |
if [ ! -d "$INDEXDIR" ]; then | |
echo -n "Creating an index dir ... " | |
mkdir $INDEXDIR | |
echo "DONE" | |
fi | |
# check for index settings file | |
if [ ! -e "${INDEXDIR}/settings.json" ]; then | |
echo -n "Backing up index settings ... " | |
curl -XGET -o "${INDEXDIR}/settings.json" "http://${SERVER}/${INDEXNAME}/_settings?pretty=true" > /dev/null 2>&1 | |
echo "DONE" | |
fi | |
# check for index mapping file | |
if [ ! -e "${INDEXDIR}/mapping.json" ]; then | |
echo -n "Backing up metadata ... " | |
curl -XGET -o "${INDEXDIR}/mapping.json" "http://${SERVER}/${INDEXNAME}/_mapping?pretty=true" > /dev/null 2>&1 | |
echo "DONE" | |
fi | |
# check for backup directory | |
if [ ! -d "${INDEXDIR}/${BACKUPDIR}" ]; then | |
echo -n "Creating a backup dir ... " | |
mkdir "${INDEXDIR}/${BACKUPDIR}" | |
echo "DONE" | |
fi | |
QUERY="{\"query\":{\"range\":{\"${DATEFIELD}\":{\"gte\":\"${BACKUPSTART}.000000Z\",\"lt\":\"${BACKUPEND}.000000Z\"}}}}" | |
dump_items_from_es "${SERVER}/${INDEXNAME}" "${INDEXDIR}/${BACKUPDIR}/${BACKUPFILE}" "${QUERY}" | |
echo "COMPLETED" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment