Last active June 8, 2020 11:19
Blog writing analysis script, referenced in ✍️
` Count sentence length, word size distribution over past posts `
std := load('std')
str := load('str')
quicksort := load('quicksort')
log := std.log
f := std.format
append := std.append
cat :=
slice := std.slice
flatten := std.flatten
reduce := std.reduce
map :=
each := std.each
filter := std.filter
readFile := std.readFile
writeFile := std.writeFile
hasPrefix? := str.hasPrefix?
split := str.split
trim := str.trim
trimS := s => trim(s, ' ')
sortBy := quicksort.sortBy
sort := quicksort.sort
` Constants `
PostsDir := './content/posts'
Newline := char(10)
blank? := s => trimS(s) = ''
` find all blog posts on the site and callback with all file names `
withAllPosts := cb => (
postFiles := dir(PostsDir, evt => evt.type :: {
'error' -> log('error: could not read posts directory!')
'data' -> cb(filter(
map(, entry =>
` filter out hidden files and the file `
fName => ~(hasPrefix?(fName, '.') | hasPrefix?(fName, '_'))
` given a potentially double-quoted string, strip the quotes `
stripQuotes := s => s.0 :: {
'"' -> slice(s, 1, len(s) - 1)
_ -> s
` given a file name to a blog post, parse it completely
and return a PostRecord structure with parsed metadata and body `
withPostRecord := (fileName, cb) => (
readFile(PostsDir + '/' + fileName, bytes => (
lines := filter(
split(bytes, Newline)
` remove raw HTML lines `
line => ~(blank?(line) | hasPrefix?(line, '<'))
` sanitize lines `
lines := map(lines, line => (sub := i => i :: {
0 -> line
_ -> (
line.(i) :: {
'_' -> line.(i) := ' '
'*' -> line.(i) := ' '
'[' -> line.(i) := ' '
']' -> line.(i) := ' '
'(' -> line.(i) := ' '
')' -> line.(i) := ' '
sub(i - 1)
})(len(line) - 1))
record := {
` parse state:
0 -> start
1 -> inside front matter
2 -> after front matter
3 -> error, stop parsing `
parseState: 0
title: ()
date: ()
body: []
each(lines, line => record.parseState :: {
0 -> line :: {
'---' -> record.parseState := 1
_ -> (
log(f('error: unexpected line in post file, {{0}}', [line]))
record.parseState := 3
1 -> line :: {
'---' -> record.parseState := 2
_ -> split(line, ':').0 :: {
'title' -> record.title := stripQuotes(trimS(split(line, 'title:').1))
'date' -> := trimS(split(line, 'date:').1)
2 -> record.body.len(record.body) := line
3 -> ()
` mean of an array `
mean := xs => len(xs) :: {
0 -> ~1
_ -> reduce(xs, (a, b) => a + b, 0) / len(xs)
` median of an array `
median := xs => xs :: {
[] -> ~1
_ -> (
sorted := sort(xs)
mid := floor(len(sorted) / 2)
(len(sorted) % 2) :: {
0 -> (sorted.(mid) + sorted.(mid - 1)) / 2
1 -> sorted.(mid)
` split up a blog post body into a flat list of words
includes doing some sanitization `
getWords := record => filter(
flatten(map(record.body, line => split(line, ' ')))
` try to remove links and empty words`
word => blank?(word) :: {
true -> false
_ -> ~(hasPrefix?(word, 'http') | hasPrefix?(word, '/'))
` split up a blog post body into a flat list of sentences
includes doing some sanitization `
getSentences := record => flatten(map(record.body, line => split(line, '. ')))
` main analysis function that works per-PostRecord, computing
statistics over the post body and publishing a CSV `
analyze := records => (
sorted := sortBy(records, r =>
log('Serializing word list...')
wordLengths := map(sorted, r => map(getWords(r), len))
log('Serializing sentence list...')
sentenceLengths := map(sorted, r => map(
sent => len(filter(split(sent, ' '), w => ~blank?(w)))
log('Computing mean word lengths')
meanWordLengths := map(wordLengths, mean)
log('Computing median word lengths')
medianWordLengths := map(wordLengths, median)
log('Computing median sentence lengths')
medianSentenceLengths := map(sentenceLengths, median)
log('Computing median paragraph lengths')
paragraphLengths := map(sorted, record => map(
para => len(filter(split(para, ' '), w => ~blank?(w)))
medianParagraphLengths := map(paragraphLengths, median)
results := {
dates: map(sorted, r =>
meanWordLengths: meanWordLengths
medianWordLengths: medianWordLengths
medianSentenceLengths: medianSentenceLengths
medianParagraphLengths: medianParagraphLengths
csv := renderCSV(results)
writeFile('./analysis.csv', csv, done => done :: {
true -> log('File saved to ./analysis.csv successfully!')
() -> log('error: failed to save analysis results csv!')
` render results into a CSV for importing into Google Sheets `
renderCSV := results => (
csvLines := []
each(keys(results), key => (
rowData := append([key], map(results.(key), string))
csvLines.len(csvLines) := cat(rowData, ',')
cat(csvLines, Newline)
` main analysis routine `
postRecords := []
withAllPosts(fileNames => each(
fName => withPostRecord(fName, record => (
log(f('read: [{{ date }}] {{ title }}', record))
postRecords.len(postRecords) := record
len(postRecords) :: {
len(fileNames) -> analyze(postRecords)
` minimal quicksort implementation
using hoare partition `
std := load('std')
map :=
clone := std.clone
sortBy := (v, pred) => (
vPred := map(v, pred)
partition := (v, lo, hi) => (
pivot := vPred.(lo)
lsub := i => (vPred.(i) < pivot) :: {
true -> lsub(i + 1)
false -> i
rsub := j => (vPred.(j) > pivot) :: {
true -> rsub(j - 1)
false -> j
(sub := (i, j) => (
i := lsub(i)
j := rsub(j)
(i < j) :: {
false -> j
true -> (
` inlined swap! `
tmp := v.(i)
tmpPred := vPred.(i)
v.(i) := v.(j)
v.(j) := tmp
vPred.(i) := vPred.(j)
vPred.(j) := tmpPred
sub(i + 1, j - 1)
))(lo, hi)
(quicksort := (v, lo, hi) => len(v) :: {
0 -> v
_ -> (lo < hi) :: {
false -> v
true -> (
p := partition(v, lo, hi)
quicksort(v, lo, p)
quicksort(v, p + 1, hi)
})(v, 0, len(v) - 1)
sort! := v => sortBy(v, x => x)
sort := v => sort!(clone(v))
