Retrieves MX and A records for 'Alexa Top 1 Million' hosts and prints them as pretty formatted JSON objects to stdout.
#!/usr/bin/env bash | |
# | |
# Retrieves MX and A records for 'Alexa Top 1 Million' hosts | |
# and prints them as pretty formatted JSON objects to stdout. | |
# | |
# *Optional* parallelism support with GNU Parallel (recommended): | |
# $ sudo apt-get install parallel | |
# | |
# Authors: Aaron Zauner <azet@azet.org> | |
# License: CC0 1.0 (https://creativecommons.org/publicdomain/zero/1.0) | |
# | |
set -eo pipefail | |
readonly top1m_s3l="https://s3.amazonaws.com/alexa-static/top-1m.csv.zip" | |
readonly top1m_zip=${top1m_s3l##*/} | |
readonly top1m_csv=${top1m_zip%.*} | |
function hosts() { | |
for line in $(<${top1m_csv}); do | |
local host=${line/*,/} | |
printf "%s " ${host} | |
done | |
} | |
function get_mx() { | |
# build JSON object from input hostname mapping | |
# MX records to the corresponding A records of | |
# a given host. including messy in-line formatting. | |
local mx_records=($(dig +short +nosearch +keepopen \ | |
+time=2 mx "${1}" | sed 's/.*\ //')) | |
printf '{\n\t"%s": {\n\t\t"mx_records": [\n' "${1}" | |
for mx in "${mx_records[@]}"; do | |
local ip=($(getent ahostsv4 ${mx})) # in our case, v4 suffices. | |
printf '\t\t\t"%s": \t"%s",\n' "${mx}" "${ip}" | |
done | |
printf "\t\t]\n\t}\n}\n" | |
} | |
# main | |
[[ -e ${top1m_csv} ]] || { | |
wget "${top1m_s3l}" &> /dev/null | |
unzip "${top1m_zip}" &> /dev/null | |
} | |
[[ ${1} == "get_mx" ]] && { | |
get_mx "${2}" && exit 0 | |
} | |
if [[ $(which parallel) ]]; then | |
printf "<< parallel mode >>\n\n" >&2 | |
parallel --progress --colsep ',' "${0} get_mx {2}" :::: ${top1m_csv} | |
else | |
printf "<< sequential mode (slow! install \`parallel\`.) >>\n\n" >&2 | |
for host in $(hosts); do | |
get_mx "${host}" | |
done | |
fi | |
trap '{ rm ${top1m_zip} ${top1m_csv}; \ | |
printf "\n\n<< finished run. >> \ | |
$(date --rfc-3339=ns) \n" >&2 }' EXIT |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment