Created
February 17, 2022 02:18
-
-
Save johnhpatton/b24ade7fbf5cdbbe56c123377b0287e7 to your computer and use it in GitHub Desktop.
Updates keyval configured in nginx-plus with IP address blocks for a whitelist.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
# update-googlebot-whitelist-keyval.sh | |
# | |
# | |
# Cron daily with this format: | |
# 0 0 * * * /usr/local/bin/update-googlebot-whitelist-keyval.sh &>/dev/null | |
# Check for dependencies, this process requires curl and jq: | |
if ! type -P curl &>/dev/null; then | |
echo "ERROR: install curl to retrieve google IP address list" | |
exit 1 | |
elif ! type -P jq &>/dev/null; then | |
echo "ERROR: install jq to parse json" | |
exit 1 | |
fi | |
# Update default values with an nginx instance configuration that acts as an | |
# API endpoint for the nginx cluster or set environment variables set to the | |
# associated values, default is: https://127.0.0.1:9000 | |
# Example: https://10.0.0.1:9993 | |
# export NGINX_CLUSTER_API_SCHEME="https" | |
# export NGINX_CLUSTER_API_SERVER="10.0.0.1" | |
# export NGINX_CLUSTER_API_PORT="9443" | |
# Example: If an internal domain is configured for API access through a load | |
# balancer listening on port 9443: | |
# export NGINX_CLUSTER_API_SCHEME="https" | |
# export NGINX_CLUSTER_API_SERVER="api.prod.int.example.com" | |
# export NGINX_CLUSTER_API_PORT="9443" | |
NGINX_CLUSTER_API_URL="${NGINX_CLUSTER_API_SCHEME:-https}://" | |
NGINX_CLUSTER_API_URL+="${NGINX_CLUSTER_API_SERVER:-127.0.0.1}:" | |
NGINX_CLUSTER_API_URL+="${NGINX_CLUSTER_API_PORT:-9443}/api/" | |
# Add the API version for the installed nginx | |
NGINX_CLUSTER_API_URL+=$(curl -sk "${NGINX_CLUSTER_API_URL}" | jq -r '.[-1]') | |
echo " INFO: Nginx Plus API URL:" | |
echo " ${NGINX_CLUSTER_API_URL}" | |
# captures differences in array named $nret | |
# NOTE: bash < 4.3 friendly | |
capture_differences() { | |
local nsource="$1" | |
local ndest="$2" | |
local nret="$3" | |
local rsource="$nsource[@]" | |
local rdest="$ndest[@]" | |
local rret="$nret[@]" | |
local -a source=("${!rsource}") | |
local -a dest=("${!rdest}") | |
local -a ret=("${!rret}") | |
local missing | |
for s in "${source[@]}"; do | |
missing=1 | |
for d in "${dest[@]}"; do | |
if [ "${d}" == "${s}" ]; then | |
missing=0 | |
break | |
fi | |
done | |
(( missing )) && eval "$nret+=(\"$s\")" | |
done | |
} | |
declare -a KEYVAL=() | |
declare -a SOURCE=() | |
declare -a ADD=() | |
declare -a REMOVE=() | |
update_google_whitelist() { | |
local zone="$1" | |
local ngx_uri="${NGINX_CLUSTER_API_URL}/http/keyvals/${zone}/" | |
zone_status=$(curl -w '%{http_code}' -sk -o /dev/null "${ngx_uri}") | |
if ! (( zone_status == 200 )); then | |
logerror "${zone} is not configured, unable to manage this zone." | |
exit 1 | |
fi | |
# Populate KEYVAL array with current values from keyval | |
if curl -sk "${ngx_uri}" | jq empty; then | |
IFS=$'\n' read -d "" -ra KEYVAL < <(curl -sk "${ngx_uri}" | jq -r 'keys[]') | |
fi | |
# Populate crawlers array with all IP address block values | |
# to whitelist | |
local -a crawlers | |
local wl_uri="https://developers.google.com/search/apis/ipranges/googlebot.json" | |
IFS=$'\n' read -d "" -ra SOURCE < <(curl -s "${wl_uri}" | jq -r '.prefixes[] | .[]') | |
crawlers=(${SOURCE[@]}) | |
wl_uri="https://www.gstatic.com/ipranges/goog.json" | |
IFS=$'\n' read -d "" -ra SOURCE < <(curl -s "${wl_uri}" | jq -r '.prefixes[] | .[]') | |
SOURCE+=(${crawlers[@]}) | |
# capture all values in SOURCE_ADDRESSES that are missing in KEYVAL | |
# in the ADD array | |
capture_differences "SOURCE" "KEYVAL" "ADD" | |
# capture all values in KEYVAL that are missing in SOURCE_ADDRESSES | |
# in the REMOVE array | |
capture_differences "KEYVAL" "SOURCE" "REMOVE" | |
for a in "${ADD[@]}"; do | |
curl -ks "${NGINX_CLUSTER_API_URL}/http/keyvals/{$zone}" -d @<(cat <<EOF | |
{"${a}": "1"} | |
EOF | |
) | |
echo "Added: $a" | |
done | |
for r in "${REMOVE[@]}"; do | |
curl -ks -X PATCH "${NGINX_CLUSTER_API_URL}/http/keyvals/{$zone}" -d @<(cat <<EOF | |
{"${r}": null} | |
EOF | |
) | |
echo "Removed: $a" | |
done | |
} | |
update_google_whitelist "googlebot_cidrs" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment