Skip to content

Instantly share code, notes, and snippets.

@chaimleib
Created January 6, 2020 22:09
Show Gist options
  • Save chaimleib/503c36984bc4dfe53f5336bc4c1d79c6 to your computer and use it in GitHub Desktop.
Save chaimleib/503c36984bc4dfe53f5336bc4c1d79c6 to your computer and use it in GitHub Desktop.
Convert TSV to JSON redirects using AWK
#!/bin/bash
# Converts .tsv files with cols [ intl?, from, to ] into JSONs.
#
# Usage:
# 1. Go to our list of redirects at https://docs.google.com/spreadsheets/d/1u4hQyNn86R1xzh8G30e14rSbvQs9S6CtsIgmz0NNIKk/edit#gid=549054781
# 2. Download each sheet as a .tsv.
# 3. path/to/json-redirects.sh ~/Downloads/*.tsv > path/to/marketing-dev/redirects.json
#
# In the results jsons, the keys are the "from" addresses, and the values the
# "to" addresses. "To" addresses on the marketing web site are rewritten to be
# relative to "/". Since the service hostname and other hostnames may change,
# those hostnames have been replaced with !help and !service so that they can
# be set by the server later per its configuration.
# awk program to reformat the src and dest urls
filterRedirects='
function destUrl(u) {
gsub(/^https?:\/\/evernote.com\//, "/", u); # del own hostname for internal
gsub(/^https?:\/\/www.evernote.com\//, "!service/", u); # let go config host
gsub(/^https?:\/\/help.evernote.com\//, "!help/", u); # let go config host
return u;
}
function srcUrl(u) {
gsub(/^https?:\/\/evernote.com\//, "/", u); # del own hostname for srcUrl
gsub(/\/+$/, "", u); # strip trailing slashes
return u;
}
/^[A-Z]{2,}/ { next }
FNR > 1 { printf "%s\t%s\t%s\n", $3, srcUrl($1), destUrl($2) }
'
# awk program to convert .tsv into .json
makeJson='
BEGIN {
FS="\t";
printf "{\n";
}
NR > 1 {
printf ",\n";
}
{
printf " \"%s\": \"%s\"", $2, $3;
}
END {
printf "\n}\n";
}
'
# Function usage: jsonRedirects [yes/no] file.tsv[...]
# produce a JSON redirects map, given a "yes" or "no" first argument (isIntl?)
# and a list of tsv files following.
jsonRedirects() {
local isIntl="${1:-.+}"
shift
awk "$filterRedirects" "$@" |
grep -E "^${isIntl}\\b" |
awk "$makeJson"
}
# use jq to combine two JSONs into one. Non-intl redirects are under "en_only",
# and intl ones are under "intl". -s treats sequential input JSONs as if in an
# array, and -S sorts the keys. And if the output stream is a terminal, we have
# colors!
(jsonRedirects no "$@"; jsonRedirects yes "$@") |
jq -s -S '{"en_only": .[0], "intl": .[1]}'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment