Skip to content

Instantly share code, notes, and snippets.

@vyzo
Created October 27, 2019 13:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save vyzo/cf8e07c8ca4dde7eee6125b48b377bdd to your computer and use it in GitHub Desktop.
Save vyzo/cf8e07c8ca4dde7eee6125b48b377bdd to your computer and use it in GitHub Desktop.
Like gxwc16, but with some strategic disabling of interrupts
(import :std/net/bio
:std/os/fdio
:std/os/fcntl
:std/sort)
(export main)
(declare (not safe))
(def (open-stdout-buffer (bufsz 4096))
(open-fd-output-buffer 1 bufsz))
(def +space+ (char->integer #\space))
(def +nl+ (char->integer #\newline))
(def input-buffer (make-u8vector 8192))
(def input-buffer-lo 0)
(def input-buffer-hi 0)
(def (read-next-word fd)
(declare (not interrupts-enabled))
(let lp ((i input-buffer-lo))
(cond
((fx< i input-buffer-hi)
(let (next (u8vector-ref input-buffer i))
(if (or (eq? next +space+) (eq? next +nl+))
(let (word (##subu8vector input-buffer input-buffer-lo i))
(set! input-buffer-lo (fx1+ i))
word)
(lp (fx1+ i)))))
((fx< input-buffer-lo input-buffer-hi)
(when (fx> input-buffer-lo 0)
(##subu8vector-move! input-buffer input-buffer-lo input-buffer-hi input-buffer 0)
(set! input-buffer-hi (fx- input-buffer-hi input-buffer-lo))
(set! input-buffer-lo 0))
(let again ()
(let (rd (fdread fd input-buffer input-buffer-hi))
(cond
((not rd) (again))
((fxzero? rd)
(let (res (##subu8vector input-buffer 0 input-buffer-hi))
(set! input-buffer-hi 0)
res))
(else
(let (i* input-buffer-hi)
(set! input-buffer-hi (fx+ input-buffer-hi rd))
(lp i*)))))))
(else
(let again ()
(let (rd (fdread fd input-buffer))
(cond
((not rd) (again))
((fxzero? rd)
#!eof)
(else
(set! input-buffer-lo 0)
(set! input-buffer-hi rd)
(lp 0)))))))))
(def (main path)
(let ((words (make-string-table 1000000))
(fd (open path O_RDONLY)))
(let lp ()
(let (word (read-next-word fd))
(unless (eof-object? word)
(string-table-push! words word)
(lp))))
(let (obuf (open-stdout-buffer 8192))
(for-each (lambda (x)
(bio-write-bytes (car x) obuf)
(bio-write-char #\space obuf)
(bio-write-string (number->string (cdr x)) obuf)
(bio-write-char #\newline obuf))
(sort! (string-table->list words)
(lambda (a b) (> (cdr a) (cdr b)))))
(bio-force-output obuf))))
;;; faster hash tables; quick and dirty vector backed implementation that doesn't resize
(def (make-string-table size)
(make-vector size #f))
(def (u8vector-hash str)
(declare (not interrupts-enabled))
(def end (u8vector-length str))
(let lp ((i 0) (h #x011C9DC5))
(if (fx< i end)
(let (next (u8vector-ref str i))
(lp (fx1+ i)
(##fxwrap* #x01000193 (##fxxor next h))))
h)))
(def (string-table-push! tab str)
(declare (not interrupts-enabled))
(let* ((h (u8vector-hash str))
(idx (##fxremainder (if (fx>= h 0) h (fx- h)) (vector-length tab))))
(cond
((vector-ref tab idx)
=> (lambda (sth)
(cond
((u8vector? (car sth))
(if (equal? (car sth) str)
(set! (cdr sth) (fx1+ (cdr sth)))
(vector-set! tab idx (list (cons str 1) sth))))
((assoc str sth)
=> (lambda (p)
(set! (cdr p) (fx1+ (cdr p)))))
(else
(vector-set! tab idx (cons (cons str 1) sth))))))
(else
(vector-set! tab idx (cons str 1))))))
(def (string-table->list tab)
(let (end (vector-length tab))
(let lp ((i 0) (r []))
(if (fx< i end)
(cond
((vector-ref tab i)
=> (lambda (sth)
(if (u8vector? (car sth))
(lp (fx1+ i) (cons sth r))
(lp (fx1+ i) (foldl cons r sth)))))
(else
(lp (fx1+ i) r)))
r))))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment