Skip to content

Instantly share code, notes, and snippets.

@johnhpatton
Created February 17, 2022 02:18
Show Gist options
  • Save johnhpatton/b24ade7fbf5cdbbe56c123377b0287e7 to your computer and use it in GitHub Desktop.
Save johnhpatton/b24ade7fbf5cdbbe56c123377b0287e7 to your computer and use it in GitHub Desktop.
Updates keyval configured in nginx-plus with IP address blocks for a whitelist.
#!/usr/bin/env bash
# update-googlebot-whitelist-keyval.sh
#
#
# Cron daily with this format:
# 0 0 * * * /usr/local/bin/update-googlebot-whitelist-keyval.sh &>/dev/null
# Check for dependencies, this process requires curl and jq:
if ! type -P curl &>/dev/null; then
echo "ERROR: install curl to retrieve google IP address list"
exit 1
elif ! type -P jq &>/dev/null; then
echo "ERROR: install jq to parse json"
exit 1
fi
# Update default values with an nginx instance configuration that acts as an
# API endpoint for the nginx cluster or set environment variables set to the
# associated values, default is: https://127.0.0.1:9000
# Example: https://10.0.0.1:9993
# export NGINX_CLUSTER_API_SCHEME="https"
# export NGINX_CLUSTER_API_SERVER="10.0.0.1"
# export NGINX_CLUSTER_API_PORT="9443"
# Example: If an internal domain is configured for API access through a load
# balancer listening on port 9443:
# export NGINX_CLUSTER_API_SCHEME="https"
# export NGINX_CLUSTER_API_SERVER="api.prod.int.example.com"
# export NGINX_CLUSTER_API_PORT="9443"
NGINX_CLUSTER_API_URL="${NGINX_CLUSTER_API_SCHEME:-https}://"
NGINX_CLUSTER_API_URL+="${NGINX_CLUSTER_API_SERVER:-127.0.0.1}:"
NGINX_CLUSTER_API_URL+="${NGINX_CLUSTER_API_PORT:-9443}/api/"
# Add the API version for the installed nginx
NGINX_CLUSTER_API_URL+=$(curl -sk "${NGINX_CLUSTER_API_URL}" | jq -r '.[-1]')
echo " INFO: Nginx Plus API URL:"
echo " ${NGINX_CLUSTER_API_URL}"
# captures differences in array named $nret
# NOTE: bash < 4.3 friendly
capture_differences() {
local nsource="$1"
local ndest="$2"
local nret="$3"
local rsource="$nsource[@]"
local rdest="$ndest[@]"
local rret="$nret[@]"
local -a source=("${!rsource}")
local -a dest=("${!rdest}")
local -a ret=("${!rret}")
local missing
for s in "${source[@]}"; do
missing=1
for d in "${dest[@]}"; do
if [ "${d}" == "${s}" ]; then
missing=0
break
fi
done
(( missing )) && eval "$nret+=(\"$s\")"
done
}
declare -a KEYVAL=()
declare -a SOURCE=()
declare -a ADD=()
declare -a REMOVE=()
update_google_whitelist() {
local zone="$1"
local ngx_uri="${NGINX_CLUSTER_API_URL}/http/keyvals/${zone}/"
zone_status=$(curl -w '%{http_code}' -sk -o /dev/null "${ngx_uri}")
if ! (( zone_status == 200 )); then
logerror "${zone} is not configured, unable to manage this zone."
exit 1
fi
# Populate KEYVAL array with current values from keyval
if curl -sk "${ngx_uri}" | jq empty; then
IFS=$'\n' read -d "" -ra KEYVAL < <(curl -sk "${ngx_uri}" | jq -r 'keys[]')
fi
# Populate crawlers array with all IP address block values
# to whitelist
local -a crawlers
local wl_uri="https://developers.google.com/search/apis/ipranges/googlebot.json"
IFS=$'\n' read -d "" -ra SOURCE < <(curl -s "${wl_uri}" | jq -r '.prefixes[] | .[]')
crawlers=(${SOURCE[@]})
wl_uri="https://www.gstatic.com/ipranges/goog.json"
IFS=$'\n' read -d "" -ra SOURCE < <(curl -s "${wl_uri}" | jq -r '.prefixes[] | .[]')
SOURCE+=(${crawlers[@]})
# capture all values in SOURCE_ADDRESSES that are missing in KEYVAL
# in the ADD array
capture_differences "SOURCE" "KEYVAL" "ADD"
# capture all values in KEYVAL that are missing in SOURCE_ADDRESSES
# in the REMOVE array
capture_differences "KEYVAL" "SOURCE" "REMOVE"
for a in "${ADD[@]}"; do
curl -ks "${NGINX_CLUSTER_API_URL}/http/keyvals/{$zone}" -d @<(cat <<EOF
{"${a}": "1"}
EOF
)
echo "Added: $a"
done
for r in "${REMOVE[@]}"; do
curl -ks -X PATCH "${NGINX_CLUSTER_API_URL}/http/keyvals/{$zone}" -d @<(cat <<EOF
{"${r}": null}
EOF
)
echo "Removed: $a"
done
}
update_google_whitelist "googlebot_cidrs"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment