Last active
September 9, 2017 11:27
-
-
Save fbettag/0dd8c2dd7cb23ca9bec204c25112ef03 to your computer and use it in GitHub Desktop.
SOLR dump and restore script using plain sh and bc, does pagination and dumps to file, also forks for insertion
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
# | |
# Dumps a subset of one Solr core and loads it into another one. | |
# | |
if [ $# -lt 4 ]; then | |
echo "Usage: $0 <source> <target> <limit> [core1 core2...]" | |
echo " $0 http://source.solr:8983/solr http://target.solr:8983/solr 100000 core1 core2" | |
exit 1 | |
fi | |
SOURCE_URL=$1 | |
TARGET_URL=$2 | |
LIMIT=$3 | |
sync() | |
{ | |
_source=$1 | |
_target=$2 | |
_core=$3 | |
_offset=$4 | |
_page=$5 | |
_fields=$6 | |
_outfile="solr-dump.${_core}.${_page}.json" | |
# Dump the results of the query as json | |
_url="${_source}/select?q=*:*&wt=json&fl=${_fields}&start=${_offset}&rows=${LIMIT}" | |
curl -s "${_url}" | sed -e 's;^.*,"docs":;;' -e 's;]}}$;];' > ${_outfile} && \ | |
echo "[${_core}] [Page ${_page}] Pulling data from source... ok" || \ | |
echo "[${_core}] [Page ${_page}] Pulling data from source... failed!" | |
# Import the dump into the other solr core with a forked script, to speed things up | |
_tmp=$(mktemp) | |
cat >${_tmp}<<-EOF | |
# post to target and report result | |
curl -qsm 900 "${_target}/update/json?commit=true" --data-binary @${_outfile} -H 'Content-type:application/json; charset=utf-8' > /dev/null && \ | |
echo "[${_core}] [Page ${_page}] Storing data in target... ok" || \ | |
echo "[${_core}] [Page ${_page}] Storing data in target... failed" | |
# clean up outfile | |
rm ${_outfile} | |
EOF | |
sh ${_tmp} && rm ${_tmp} & | |
} | |
CORE=4 | |
while [ $CORE -le $# ]; do | |
eval _core="\${$CORE}" | |
# count the number of documents for pagination | |
_docs=$(curl -s "$SOURCE_URL/${_core}/select?q=*:*&rows=0&wt=json" | egrep -oe '"numFound":[0-9]+' |sed -e 's;^.*:;;g') | |
# get field list and exclude _version_ | |
_fields=$(curl -s "$SOURCE_URL/${_core}/select?q=*:*&fl=*&wt=csv&rows=0" | sed -e 's/,_version_,/,/' -e 's/,_version_//' -e 's/_version_,//') | |
_page=0 | |
_pages=$(echo "(${_docs}/$LIMIT)+1" | bc) | |
echo "[${_core}] ${_docs} docs pages: ${_pages}" | |
while [ ${_page} -lt ${_pages} ]; do | |
_offset=$(echo "${_page}*$LIMIT"| bc) | |
_page=$(echo "${_page}+1" | bc) | |
sync "$SOURCE_URL/${_core}" "$TARGET_URL/${_core}" "${_core}" "${_offset}" "${_page}" "${_fields}" | |
done | |
CORE=$(echo "${CORE}+1" | bc) | |
done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment