Skip to content

Instantly share code, notes, and snippets.

@mbreksopuro
Last active September 26, 2023 18:28
Show Gist options
  • Star 6 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mbreksopuro/df4d8f11f1452608befa0d55e14d30e4 to your computer and use it in GitHub Desktop.
Save mbreksopuro/df4d8f11f1452608befa0d55e14d30e4 to your computer and use it in GitHub Desktop.
Reindex API from Remote ElasticSearch to Local ElasticSearch Servers (for up to ES 6.x)
#!/bin/bash
###################################################
## Start ElasticSearch Reindex Same Indices
##
## Created by : Hartfordfive - 2019-02-01
## Modified by : Marjono B. Reksopuro - 2019-03-11
##
##
###################################################
#
# Load the home .bashrc if it hasn't been included.
# Note: BASHRC_LOADED needs to be set in the .bashrc.
# Use this piece of code before any other variables are set since
# the code might override the .bashrc variables
if [ -f ~/.bashrc -a -z "$BASHRC_LOADED" ]
then
. ~/.bashrc
fi
# General variables
export ACCOUNT=`whoami`
export PGM=`basename $0`
export SERVER=`hostname`
export DATE=`date +%Y%m%d%H%M`
export OVERDIR="/opt/elasticsearch/scripts"
export TERM=linux
export http_proxy=
export https_proxy=
export ftp_proxy=
OVERRIDES()
{
# There are two places where you can override the
# variables. Either at the environment test or dev level
# -or- at the server level. This allows us to keep generic
# scripts but will accomodate upgrades.
if [ -f $OVERDIR/overrides.${ENV} ]
then
. $OVERDIR/overrides.${ENV}
fi
if [ -f $OVERDIR/overrides.$SERVER ]
then
. $OVERDIR/overrides.$SERVER
fi
}
# Determine if we have the correct number of input variables
##if [ "$1" == "" ] || [ "$2" == "" ]; then
if [ $# -lt 5 ]
then
echo "Usage: $PGM <sourcehost> <sourceport> <user> <pwd> <file_indices_list> <desthost> <destport>"
echo " Example: $PGM elasticsearch-dev.local.domain 9200 elastic xxxx to_list_reindex_indices elasticsearch-dev.local.domain 9200"
echo " Example: $PGM elasticsearch-dev.local.domain 9200 elastic xxxx to_list_reindex_indices elasticsearch-sandbox.local.domain 19200"
exit 1
fi
# Set the variables as appropriate for this product
export ESJSON=tmp_reindex.json
export REMOTE_HOST=$1:$2
export ESUSER=$3
export ESPASSWORD="$4"
export PATTERN=$5
if [ "$6" == "" ]; then
export LOCAL_HOST=$1:$2
else
export LOCAL_HOST=$6:$7
fi
clear
echo "---------------------------- NOTICE ----------------------------------"
echo "You must ensure you have the following setting in your local ES host's:"
echo "elasticsearch.yml config (the one re-indexing to):"
echo " reindex.remote.whitelist: $REMOTE_HOST"
echo "Also, if an index template is necessary for this data, you must create"
echo "locally before you start the re-indexing process"
echo "----------------------------------------------------------------------"
sleep 5
##INDICES=$(curl -H'Content-Type: application/json' --silent "$REMOTE_HOST/_cat/indices/$PATTERN?h=index")
export INDICES=`cat $PATTERN`
export TOTAL_INCOMPLETE_INDICES=0
export TOTAL_INDICES=0
export TOTAL_DURATION=0
export INCOMPLETE_INDICES=()
for INDEX in $INDICES; do
TOTAL_DOCS_REMOTE=$(curl -H 'Content-Type: application/json' --silent "http://${ESUSER}:${ESPASSWORD}@${REMOTE_HOST}/_cat/indices/${INDEX}?h=docs.count")
echo "Attempting to re-indexing ${INDEX} (${TOTAL_DOCS_REMOTE} docs total) from remote ES server..."
SECONDS=0
echo Creating Input File called ${ESJSON} .........
echo '{' > ${ESJSON}
echo '"conflicts": "proceed",' >> ${ESJSON}
echo '"source": {' >> ${ESJSON}
echo '"remote": {' >> ${ESJSON}
echo '"host": "http://'${REMOTE_HOST}'",' >> ${ESJSON}
echo '"username": "'${ESUSER}'",' >> ${ESJSON}
echo '"password": "'${ESPASSWORD}'"' >> ${ESJSON}
echo '},' >> ${ESJSON}
echo '"index": [' >> ${ESJSON}
echo '"'${INDEX}'"' >> ${ESJSON}
echo ']' >> ${ESJSON}
echo '},' >> ${ESJSON}
echo '"dest": {' >> ${ESJSON}
echo '"index": "'${INDEX}-r'"' >> ${ESJSON}
echo '},' >> ${ESJSON}
echo '"script": {' >> ${ESJSON}
echo '"lang": "painless",' >> ${ESJSON}
echo ' "source": " ctx._source.group.B2BLog.type = ctx._type;\n ctx._type = \"logs\";"' >> ${ESJSON}
echo '}' >> ${ESJSON}
echo '}' >> ${ESJSON}
cat ${ESJSON}
echo curl -H 'Content-Type: application/json' -XPOST "http://${ESUSER}:${ESPASSWORD}@${LOCAL_HOST}/_reindex?wait_for_completion=true&pretty=true" --data-binary @${ESJSON}
curl -H 'Content-Type: application/json' -XPOST "http://${ESUSER}:${ESPASSWORD}@${LOCAL_HOST}/_reindex?wait_for_completion=true&pretty=true" --data-binary @${ESJSON}
duration=${SECONDS}
LOCAL_INDEX_EXISTS=$(curl -H 'Content-Type: application/json' -o /dev/null --silent --head --write-out '%{http_code}' "http://${ESUSER}:${ESPASSWORD}@${LOCAL_HOST}/${INDEX}")
if [ "$LOCAL_INDEX_EXISTS" == "200" ]; then
TOTAL_DOCS_REINDEXED=$(curl -H 'Content-Type: application/json' --silent "http://${ESUSER}:${ESPASSWORD}@$LOCAL_HOST/_cat/indices/$INDEX?h=docs.count")
else
TOTAL_DOCS_REINDEXED=0
fi
echo " Re-indexing results:"
echo " -> Time taken: $(($duration / 60)) minutes and $(($duration % 60)) seconds"
echo " -> Docs indexed: $TOTAL_DOCS_REINDEXED out of $TOTAL_DOCS_REMOTE"
echo ""
TOTAL_DURATION=$(($TOTAL_DURATION+$duration))
if [ "$TOTAL_DOCS_REMOTE" -ne "$TOTAL_DOCS_REINDEXED" ]; then
TOTAL_INCOMPLETE_INDICES=$(($TOTAL_INCOMPLETE_INDICES+1))
INCOMPLETE_INDICES+=($INDEX)
fi
TOTAL_INDICES=$((TOTAL_INDICES+1))
done
echo "---------------------- STATS --------------------------"
echo "Total Duration of Re-Indexing Process: $((TOTAL_DURATION / 60))m $((TOTAL_DURATION % 60))"
echo "Total Indices: $TOTAL_INDICES"
echo "Total Incomplete Re-Indexed Indices: $TOTAL_INCOMPLETE_INDICES"
if [ "$TOTAL_INCOMPLETE_INDICES" -ne "0" ]; then
printf '%s\n' "${INCOMPLETE_INDICES[@]}"
fi
echo "-------------------------------------------------------"
echo ""
@jinnabaalu
Copy link

jinnabaalu commented Jun 12, 2019

Can You proviede pattern example?
What is the expected value of to_list_reindex_indices

@sc7565
Copy link

sc7565 commented Aug 6, 2019

It is a flat file with index names on each line

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment