Skip to content

Instantly share code, notes, and snippets.

@vyzo
Created October 27, 2019 16:41
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save vyzo/ec8bf14c071375ecae3c6a885364f838 to your computer and use it in GitHub Desktop.
Save vyzo/ec8bf14c071375ecae3c6a885364f838 to your computer and use it in GitHub Desktop.
wc with open addressing table/quadratic probing
(import :std/net/bio
:std/os/fdio
:std/os/fcntl
:std/sort)
(export main)
(declare (not safe))
(def (open-stdout-buffer (bufsz 4096))
(open-fd-output-buffer 1 bufsz))
(def +space+ (char->integer #\space))
(def +nl+ (char->integer #\newline))
(def input-buffer (make-u8vector 8192))
(def input-buffer-lo 0)
(def input-buffer-hi 0)
(def (read-next-word fd)
(declare (not interrupts-enabled))
(let lp ((i input-buffer-lo))
(cond
((fx< i input-buffer-hi)
(let (next (u8vector-ref input-buffer i))
(if (or (eq? next +space+) (eq? next +nl+))
(let (word (##subu8vector input-buffer input-buffer-lo i))
(set! input-buffer-lo (fx1+ i))
word)
(lp (fx1+ i)))))
((fx< input-buffer-lo input-buffer-hi)
(when (fx> input-buffer-lo 0)
(##subu8vector-move! input-buffer input-buffer-lo input-buffer-hi input-buffer 0)
(set! input-buffer-hi (fx- input-buffer-hi input-buffer-lo))
(set! input-buffer-lo 0))
(let again ()
(let (rd (fdread fd input-buffer input-buffer-hi))
(cond
((not rd) (again))
((fxzero? rd)
(let (res (##subu8vector input-buffer 0 input-buffer-hi))
(set! input-buffer-hi 0)
res))
(else
(let (i* input-buffer-hi)
(set! input-buffer-hi (fx+ input-buffer-hi rd))
(lp i*)))))))
(else
(let again ()
(let (rd (fdread fd input-buffer))
(cond
((not rd) (again))
((fxzero? rd)
#!eof)
(else
(set! input-buffer-lo 0)
(set! input-buffer-hi rd)
(lp 0)))))))))
#;(def string-table-size 999331)
(def string-table-size 4194303)
(def (main path)
(let ((words (make-string-table string-table-size))
(fd (open path O_RDONLY)))
(let lp ()
(let (word (read-next-word fd))
(unless (eof-object? word)
(string-table-push! words word)
(lp))))
(let (obuf (open-stdout-buffer 8192))
(for-each (lambda (x)
(bio-write-bytes (car x) obuf)
(bio-write-char #\space obuf)
(bio-write-string (number->string (cdr x)) obuf)
(bio-write-char #\newline obuf))
(sort! (string-table->list words)
(lambda (a b) (> (cdr a) (cdr b)))))
(bio-force-output obuf))))
;;; faster hash tables; quick and dirty vector backed implementation that doesn't resize
(def (make-string-table size)
(make-vector size #f))
(def (u8vector-hash str)
(declare (not interrupts-enabled))
(def end (u8vector-length str))
(let lp ((i 0) (h #xCBF29CE484222325))
(cond
((fx< i end)
(let (next (u8vector-ref str i))
(lp (fx1+ i)
(##fxwrap* #x0100000001B3 (##fxxor next h)))))
((fx>= h 0) h)
(else (fx- h)))))
(def (string-table-push! tab str)
(declare (not interrupts-enabled))
(let* ((M (vector-length tab))
(h (u8vector-hash str))
(i (##fxremainder h M)))
(cond
((vector-ref tab i)
=> (lambda (sth)
(if (equal? (car sth) str)
(set! (cdr sth) (fx1+ (cdr sth)))
(let (end (##fxquotient M 2))
(let lp ((j 1))
(if (fx< j end)
(let* ((h% (fx+ h j (##fxsquare j)))
(i% (##fxremainder h% M)))
(cond
((vector-ref tab i%)
=> (lambda (sth)
(if (equal? (car sth) str)
(set! (cdr sth) (fx1+ (cdr sth)))
(lp (fx1+ j)))))
(else
(vector-set! tab i% (cons str 1)))))
(error "Ayyye ayyye ayyye! Table is full!")))))))
(else
(vector-set! tab i (cons str 1))))))
(def (string-table->list tab)
(def end (vector-length tab))
(let lp ((i 0) (r []))
(if (fx< i end)
(cond
((vector-ref tab i)
=> (lambda (sth)
(lp (fx1+ i) (cons sth r))))
(else
(lp (fx1+ i) r)))
r)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment