Skip to content

Instantly share code, notes, and snippets.

@homogulosus
Last active July 13, 2020 17:50
Show Gist options
  • Save homogulosus/132bf15339a20a438d070f7d9f7664f5 to your computer and use it in GitHub Desktop.
Save homogulosus/132bf15339a20a438d070f7d9f7664f5 to your computer and use it in GitHub Desktop.
Download and URL recursively and compiles html to markdown
#!/usr/bin/env zsh
# Version: 0.1
# Author: homogulosus
# Date: Mon Jul 13 13:19:32 EDT 2020
# Description: Downloads a URL recursively and recompiles html to markdown.
URL=$1
function down_with_site() {
echo $BOLD; echo "Dowloading: $URL" $RESET
wget \
--recursive \
--level=4 \
--no-netrc \
--page-requisites \
--adjust-extension \
--no-parent \
--cut-dirs=1 \
--no-verbose \
--convert-links $URL
# --no-clobber \
# --base=$URL \
}
function html_markdown() {
echo $GREEN; echo "HTML 2 MARKDOWN >>>" $RESET
find . -name "*.ht*" | while read i; do pandoc -f html -t markdown "$i" -o "${i%.*}.md"; done
}
function clean_up() {
echo $GREEN; echo "Cleaning UP >>> \n"$RESET
find . -iname "*.html" -delete
# find . -iname "*html*" -exec rename .html.md .md '{}' \;
}
function setup_color() {
# Only use colors if connected to a terminal
if [ -t 1 ]; then
GREEN=$(printf '\033[32m')
BOLD=$(printf '\033[1m')
RESET=$(printf '\033[m')
else
GREEN=""
BOLD=""
RESET=""
fi
}
function main() {
setup_color
down_with_site "$@"
html_markdown
clean_up
}
main "$@"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment