Skip to content

Instantly share code, notes, and snippets.

@vyzo
Created October 24, 2019 23:02
Show Gist options
  • Save vyzo/c650a23226f377400286813b1c49bb1b to your computer and use it in GitHub Desktop.
Save vyzo/c650a23226f377400286813b1c49bb1b to your computer and use it in GitHub Desktop.
wc with tries and raw devices; atrocities!
(import :gerbil/gambit/ports
:std/net/bio
:std/sort)
(export main)
(declare (not safe))
(def +nl+
(char->integer #\newline))
(def +space+
(char->integer #\space))
;; words are represented as lists of u8s in reverse
(def (read-next-word buf)
(let lp ((bytes []))
(let (next (bio-read-u8 buf))
(cond
((eof-object? next)
(if (null? bytes)
next
bytes))
((or (eq? next +space+) (eq? next +nl+))
(if (null? bytes)
(lp bytes)
bytes))
(else
(lp (cons next bytes)))))))
(def (show word count)
(let (u8v (list->u8vector word))
(write-u8vector u8v)
(write-u8 +space+)
(write count)
(write-u8 +nl+)))
(def (main path)
(let ((words (make-trie))
(buf (open-file-input-buffer path)))
(let lp ()
(let (word (read-next-word buf))
(unless (eof-object? word)
(trie-update! words word fx1+ 0)
(lp))))
(for-each (lambda (x) (show (car x) (cdr x)))
(sort! (trie->list words)
(lambda (a b) (> (cdr a) (cdr b)))))))
;;;; quick and dirty trie implementation
(def trie-length 128)
(def (make-trie)
(make-vector trie-length #f))
(defstruct leaf (value)
final: #t unchecked: #t)
(def (trie-update! trie word update default)
(match word
([u8 . rest]
(cond
((vector-ref trie u8)
=> (lambda (sth)
(cond
((vector? sth)
(if (null? rest)
(let (new-sth (cons (make-leaf (update default)) sth))
(vector-set! trie u8 new-sth))
(trie-update! sth rest update default)))
((pair? sth)
(if (null? rest)
(let (leaf (car sth))
(&leaf-value-set! leaf (update (&leaf-value leaf))))
(trie-update! (cdr sth) rest update default)))
((leaf? sth)
(if (null? rest)
(&leaf-value-set! sth (update (&leaf-value sth)))
(let* ((new-trie (make-trie))
(new-sth (cons sth new-trie)))
(vector-set! trie u8 new-sth)
(trie-update! new-trie rest update default))))
(else
(if (null? rest)
(let (new-sth (make-leaf (update default)))
(vector-set! trie u8 new-sth))
(let (new-trie (make-trie))
(vector-set! trie u8 new-trie)
(trie-update! new-trie rest update default)))))))
(else
(if (null? rest)
(vector-set! trie u8 (make-leaf (update default)))
(let (new-trie (make-trie))
(vector-set! trie u8 new-trie)
(trie-update! new-trie rest update default))))))))
(def (trie->list trie)
(let recur ((trie trie) (word []) (r []))
(if trie
(let lp ((i 0) (r r))
(if (fx< i trie-length)
(let ((word* (cons i word))
(sth (vector-ref trie i)))
(cond
((vector? sth)
(lp (fx1+ i)
(recur sth word* r)))
((pair? sth)
(lp (fx1+ i)
(recur (cdr sth) word* (cons (cons word* (&leaf-value (car sth))) r))))
((leaf? sth)
(lp (fx1+ i)
(cons (cons word* (&leaf-value sth)) r)))
(else
(lp (fx1+ i) r))))
r))
r)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment