Created
August 27, 2017 14:50
-
-
Save Royaljerry/799daf1e7f4855a86273b9f31b920d85 to your computer and use it in GitHub Desktop.
Group Markdown Links by Their Domains
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
# Limit the number of links, under which they go to the miscellaneous section | |
limit=3 | |
# Define newline character | |
nl=$'\n' | |
# Storage of miscellaneous links - this is the section containing | |
# too few links to be included in a separate domain group | |
misc=""; | |
# First get the lines in question [1] | |
original=$(echo "$1") | |
# Then sort all links by their names | |
sorted=$(echo "$original" | sort | uniq) | |
# Then sort all links by their domains [2] | |
master=$(cat $sorted | sed 's/\(.*:\/\/\(www\.\)*\)/\1\ /' | sort -t$'\t' -k2 | sed 's/ //') | |
# Main section | |
# with links grouped by their domains | |
echo "### Links by Domain" | |
echo "" | |
while IFS= read -r line; do | |
# Get links as text to reuse it later | |
links_txt=$(echo "$master" | grep "$line") | |
# Get number of links belonging to one domain | |
links_num=$(echo "$links_txt" | wc -l) | |
# Parse lines | |
if [ -n "$line" ]; then | |
# Add lines to the miscellaneous section, if it contains few links | |
if [ "$links_num" -le "$limit" ]; then | |
misc="$misc${nl}$links_txt" | |
# Write lines out directly, if there are much enough links | |
else | |
echo "#### $line" | |
echo "" | |
echo "$links_txt" | |
echo "" | |
fi | |
fi | |
done < <(echo "$master" |\ | |
# Remove everything until the URL | |
sed 's/\(.*](http[s]*:\/\/\)\(.*\)/\2/' |\ | |
# Remove subfolder(s) from the URL | |
sed 's/\([^\/]*\)\(\/\)\(.*\)/\1/' |\ | |
# Remove subdomain | |
sed 's/\([^.]\{1,\}\)\([.]\{1\}\)\([^.]\{1,\}\)\(.\)\([.]\{1\}\)/\3\4\5/' |\ | |
# Sort result | |
sort |\ | |
# Keep unique lines only | |
uniq) | |
# Miscellaneous section | |
# with assorted links without grouping | |
echo "### Miscellaneous links" | |
echo "$misc" | |
# -------------------------------------------------- | |
# COMMENTS | |
# -------------------------------------------------- | |
# | |
# [1] For using this script in a text editor (i.e. not as | |
# a bash text filter) change $1 to EDITOR_INPUT_VARIABLE. | |
# (E.g. in case of TextMate it is $TM_SELECTED_TEXT.) | |
# | |
# [2] As the syntax \t for a tab character in sed is not standard: | |
# OS X sed (...) doesn't support \t for tab and instead treats | |
# \t as meaning backslash followed by t. | |
# Therefore the easiest way to circumvent this headache is | |
# to use a literal tab character. | |
# See https://goo.gl/11Zg1s |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment