Skip to content

Instantly share code, notes, and snippets.

@azet
Last active September 6, 2017 08:55
Show Gist options
  • Star 5 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save azet/e81d7bc54332ba6ebffb to your computer and use it in GitHub Desktop.
Save azet/e81d7bc54332ba6ebffb to your computer and use it in GitHub Desktop.
Retrieves MX and A records for 'Alexa Top 1 Million' hosts and prints them as pretty formatted JSON objects to stdout.
#!/usr/bin/env bash
#
# Retrieves MX and A records for 'Alexa Top 1 Million' hosts
# and prints them as pretty formatted JSON objects to stdout.
#
# *Optional* parallelism support with GNU Parallel (recommended):
# $ sudo apt-get install parallel
#
# Authors: Aaron Zauner <azet@azet.org>
# License: CC0 1.0 (https://creativecommons.org/publicdomain/zero/1.0)
#
set -eo pipefail
readonly top1m_s3l="https://s3.amazonaws.com/alexa-static/top-1m.csv.zip"
readonly top1m_zip=${top1m_s3l##*/}
readonly top1m_csv=${top1m_zip%.*}
function hosts() {
for line in $(<${top1m_csv}); do
local host=${line/*,/}
printf "%s " ${host}
done
}
function get_mx() {
# build JSON object from input hostname mapping
# MX records to the corresponding A records of
# a given host. including messy in-line formatting.
local mx_records=($(dig +short +nosearch +keepopen \
+time=2 mx "${1}" | sed 's/.*\ //'))
printf '{\n\t"%s": {\n\t\t"mx_records": [\n' "${1}"
for mx in "${mx_records[@]}"; do
local ip=($(getent ahostsv4 ${mx})) # in our case, v4 suffices.
printf '\t\t\t"%s": \t"%s",\n' "${mx}" "${ip}"
done
printf "\t\t]\n\t}\n}\n"
}
# main
[[ -e ${top1m_csv} ]] || {
wget "${top1m_s3l}" &> /dev/null
unzip "${top1m_zip}" &> /dev/null
}
[[ ${1} == "get_mx" ]] && {
get_mx "${2}" && exit 0
}
if [[ $(which parallel) ]]; then
printf "<< parallel mode >>\n\n" >&2
parallel --progress --colsep ',' "${0} get_mx {2}" :::: ${top1m_csv}
else
printf "<< sequential mode (slow! install \`parallel\`.) >>\n\n" >&2
for host in $(hosts); do
get_mx "${host}"
done
fi
trap '{ rm ${top1m_zip} ${top1m_csv}; \
printf "\n\n<< finished run. >> \
$(date --rfc-3339=ns) \n" >&2 }' EXIT
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment