Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Retrieves MX and A records for 'Alexa Top 1 Million' hosts and prints them as pretty formatted JSON objects to stdout.
#!/usr/bin/env bash
#
# Retrieves MX and A records for 'Alexa Top 1 Million' hosts
# and prints them as pretty formatted JSON objects to stdout.
#
# *Optional* parallelism support with GNU Parallel (recommended):
# $ sudo apt-get install parallel
#
# Authors: Aaron Zauner <azet@azet.org>
# License: CC0 1.0 (https://creativecommons.org/publicdomain/zero/1.0)
#
set -eo pipefail
readonly top1m_s3l="https://s3.amazonaws.com/alexa-static/top-1m.csv.zip"
readonly top1m_zip=${top1m_s3l##*/}
readonly top1m_csv=${top1m_zip%.*}
function hosts() {
for line in $(<${top1m_csv}); do
local host=${line/*,/}
printf "%s " ${host}
done
}
function get_mx() {
# build JSON object from input hostname mapping
# MX records to the corresponding A records of
# a given host. including messy in-line formatting.
local mx_records=($(dig +short +nosearch +keepopen \
+time=2 mx "${1}" | sed 's/.*\ //'))
printf '{\n\t"%s": {\n\t\t"mx_records": [\n' "${1}"
for mx in "${mx_records[@]}"; do
local ip=($(getent ahostsv4 ${mx})) # in our case, v4 suffices.
printf '\t\t\t"%s": \t"%s",\n' "${mx}" "${ip}"
done
printf "\t\t]\n\t}\n}\n"
}
# main
[[ -e ${top1m_csv} ]] || {
wget "${top1m_s3l}" &> /dev/null
unzip "${top1m_zip}" &> /dev/null
}
[[ ${1} == "get_mx" ]] && {
get_mx "${2}" && exit 0
}
if [[ $(which parallel) ]]; then
printf "<< parallel mode >>\n\n" >&2
parallel --progress --colsep ',' "${0} get_mx {2}" :::: ${top1m_csv}
else
printf "<< sequential mode (slow! install \`parallel\`.) >>\n\n" >&2
for host in $(hosts); do
get_mx "${host}"
done
fi
trap '{ rm ${top1m_zip} ${top1m_csv}; \
printf "\n\n<< finished run. >> \
$(date --rfc-3339=ns) \n" >&2 }' EXIT
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.