Skip to content

Instantly share code, notes, and snippets.

@jgreely
Last active April 7, 2017 23:32
Show Gist options
  • Save jgreely/2338c72c825d2a93713e4f0fc0025985 to your computer and use it in GitHub Desktop.
Save jgreely/2338c72c825d2a93713e4f0fc0025985 to your computer and use it in GitHub Desktop.
create random Hugo content files for theme testing
#!/usr/bin/env bash
# create Hugo content files from random Wikipedia articles;
# requires Pandoc to convert HTML to Markdown
#
read -r -d '' USAGE <<'EOF'
Usage: wikiblog.sh [options]
-a list of possible authors for an article (selects 1)
-c list of possible categories (select up to N-1)
-n number of articles to generate (default 10)
-s series to assign all articles to
-t list of possible tags (select up to N-1)
EOF
# http://mywiki.wooledge.org/BashFAQ/026
shuffle() {
local i tmp size max rand
size=${#shuffled[*]}
for ((i=size-1; i>0; i--)); do
max=$(( 32768 / (i+1) * (i+1) ))
while (( (rand=$RANDOM) >= max )); do :; done
rand=$(( rand % (i+1) ))
tmp=${shuffled[i]}
shuffled[i]=${shuffled[rand]}
shuffled[rand]=$tmp
done
}
# https://gist.github.com/cdown/1163649
urldecode() {
local url_encoded="${1//+/ }"
printf '%b' "${url_encoded//%/\\x}"
}
authors=(alice bob carol ted)
categories=(blog home food work pets games movies books music art)
tags=(red orange yellow green blue indigo violet white black grey)
number=10
series=
while getopts ":a:c:n:s:t:" opt; do
case $opt in
a)
IFS=, read -ra authors <<< "$OPTARG"
;;
c)
IFS=, read -ra categories <<< "$OPTARG"
;;
n)
number="$OPTARG"
;;
s)
series="$OPTARG"
;;
t)
IFS=, read -ra tags <<< "$OPTARG"
;;
\?)
echo "$USAGE"
exit 1
;;
esac
done
# if $series is set, make all articles part of it.
if [ "$series" ]; then
series='series= "'$series'"'
fi
WIKI="https://en.wikipedia.org/wiki/Special:Random"
count=1
while [ $count -le $number ]; do
# random author
myauthor=${authors[$((RANDOM % ${#authors[@]}))]}
# random date in the past ~10 years
if [ $(uname) = "Darwin" ]; then
DATE=$(date -r $(( $(date +%s) - $RANDOM * 10000 )) \
"+%Y-%m-%dT%H:%M:%S")
else
DATE=$(date -d @$(( $(date +%s) - $RANDOM * 10000 )) \
"+%Y-%m-%dT%H:%M:%S")
fi
# random wikipedia article
URL=$(curl -s --max-redirs 0 -I $WIKI | tr -d '\015' |
awk '/^Location/{print $2}')
TITLE=$( urldecode $(basename $URL) | tr _ ' ' | sed -e 's/,/, /g' -e 's/ / /')
# random subset of categories
shuffled=("${categories[@]}")
shuffle
mycats=$(echo ${shuffled[@]:0:$((RANDOM % (${#shuffled[@]} - 1) + 1))} |
sed -e 's/ /", "/g')
# random subset of tags
shuffled=("${tags[@]}")
shuffle
mytags=$(echo ${shuffled[@]:0:$((RANDOM % (${#shuffled[@]} - 1) + 1))} |
sed -e 's/ /", "/g')
OUT=$(printf "wiki-%03d-%05d.md" $count $RANDOM)
echo "$count $TITLE"
cat > $OUT <<EOF
+++
title = "$TITLE"
date = "$DATE"
source = "$URL"
categories = [ "$mycats" ]
tags = [ "$mytags" ]
$series
+++
EOF
# retrieve the article and use Pandoc to convert it to Markdown
curl -s "$URL?action=render" | pandoc -f html -t markdown_github >> $OUT
count=$(( count + 1 ))
done
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment