Last active
February 22, 2023 05:59
-
-
Save jonlabelle/c7d641973673698da29e3132130d6355 to your computer and use it in GitHub Desktop.
Bash script to convert a web page URL to Markdown using Pandoc.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
function url2markdown() | |
{ | |
if [ -z "$1" ]; then | |
echo "Usage: url2markdown <url|path> [outfile.md]" | |
echo "" | |
echo " Converts the specified URL (or path) to Markdown using Pandoc." | |
echo "" | |
echo " If [outfile.md] (arg 2) is not specified..." | |
echo "" | |
echo " - basename of url|path is used (arg 1)" | |
echo " - whitepsaces replaced with underscores" | |
echo " - control characters replaced with underscores" | |
echo " - convert to lowercase" | |
echo " - existing file extension replaced with .md" | |
return 1 | |
fi | |
local outfile | |
if [ -z "$2" ]; then | |
outfile=$(basename "${1}" | sed -E 's/[_[:blank:]]+/_/g' | sed -E 's/[_[:cntrl:]]+/_/g' | tr '[:upper:]' '[:lower:]') | |
outfile="${outfile%%.*}.md" | |
else | |
outfile="$2" | |
fi | |
pandoc --standalone --from html "${1}" --to markdown --output "${outfile}" | |
} | |
url2markdown "$1" "$2" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
@jonlabelle on the first try I get
Unsupported charset "UTF-8"