Skip to content

Instantly share code, notes, and snippets.

@vyzo
Last active October 27, 2019 19:52
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save vyzo/8fdec4aae545ef0f32f29062f5fa22a1 to your computer and use it in GitHub Desktop.
Save vyzo/8fdec4aae545ef0f32f29062f5fa22a1 to your computer and use it in GitHub Desktop.
wc with mmaped files
(import :std/net/bio
:std/os/fdio
:std/os/fcntl
:std/os/error
:std/sort
:std/foreign
:gerbil/gambit/os)
(export main)
(declare (not safe))
(def (open-stdout-buffer (bufsz 4096))
(open-fd-output-buffer 1 bufsz))
(def +space+ (char->integer #\space))
(def +nl+ (char->integer #\newline))
(def input-cursor 0)
(def input-end 0)
(def (read-next-word ptr)
(declare (not interrupts-enabled))
(let lp ((i input-cursor))
(cond
((fx< i input-end)
(let (next (mem-u8-ref ptr i))
(if (or (eq? next +space+) (eq? next +nl+))
(let (word (mem-u8vector-ref ptr input-cursor i))
(set! input-cursor (fx1+ i))
word)
(lp (fx1+ i)))))
((fx> i input-cursor)
(let (word (mem-u8vector-ref ptr input-cursor i))
(set! input-cursor i)
word))
(else #!eof))))
#;(def string-table-size 999331)
(def string-table-size 4194303)
(def (main path)
(let ((words (make-string-table string-table-size))
((values ptr fd size) (mmap-file path)))
(set! input-end size)
(let lp ()
(let (word (read-next-word ptr))
(unless (eof-object? word)
(string-table-push! words word)
(lp))))
(let (obuf (open-stdout-buffer 8192))
(for-each (lambda (x)
(bio-write-bytes (car x) obuf)
(bio-write-char #\space obuf)
(bio-write-string (number->string (cdr x)) obuf)
(bio-write-char #\newline obuf))
(sort! (string-table->list words)
(lambda (a b) (> (cdr a) (cdr b)))))
(bio-force-output obuf))))
;;; faster hash tables; quick and dirty vector backed implementation that doesn't resize
(def (make-string-table size)
(make-vector size #f))
(def (u8vector-hash str)
(declare (not interrupts-enabled))
(def end (u8vector-length str))
(let lp ((i 0) (h #xCBF29CE484222325))
(if (fx< i end)
(let (next (u8vector-ref str i))
(lp (fx1+ i)
(##fxwrap* #x0100000001B3 (##fxxor next h))))
(##fxabs h))))
(def (string-table-push! tab str)
(declare (not interrupts-enabled))
(let* ((M (vector-length tab))
(h (u8vector-hash str))
(i (##fxremainder h M)))
(cond
((vector-ref tab i)
=> (lambda (sth)
(if (##u8vector-equal? (car sth) str)
(set! (cdr sth) (fx1+ (cdr sth)))
(let (end (##fxquotient M 2))
(let lp ((j 1))
(if (fx< j end)
(let* ((h% (fx+ h j (##fxsquare j)))
(i% (##fxremainder h% M)))
(cond
((vector-ref tab i%)
=> (lambda (sth)
(if (##u8vector-equal? (car sth) str)
(set! (cdr sth) (fx1+ (cdr sth)))
(lp (fx1+ j)))))
(else
(vector-set! tab i% (cons str 1)))))
(error "Ayyye ayyye ayyye! Table is full!")))))))
(else
(vector-set! tab i (cons str 1))))))
(def (string-table->list tab)
(def end (vector-length tab))
(let lp ((i 0) (r []))
(if (fx< i end)
(cond
((vector-ref tab i)
=> (lambda (sth)
(lp (fx1+ i) (cons sth r))))
(else
(lp (fx1+ i) r)))
r)))
;;; mmaped files
(begin-ffi (_mmap)
(c-declare "#include <string.h>")
(c-declare "#include <errno.h>")
(c-declare "#include <sys/mman.h>")
(define-c-lambda _mmap (int size_t) (pointer void)
#<<END-C
void *r = mmap (NULL, ___arg2, PROT_READ, MAP_SHARED, ___arg1, 0);
if (r == MAP_FAILED) {
___return (NULL);
} else {
___return (r);
}
END-C
))
(def (mmap-file path)
(declare (not interrupts-enabled))
(let* ((size (file-info-size (file-info path)))
(fd (check-os-error (_open path O_RDONLY 0)
(mmap-file path)))
(ptr (_mmap fd size)))
(if ptr
(values ptr fd size)
(raise-os-error (##c-code "___RESULT = ___FIX (errno);") mmap-file path))))
(def (mem-u8-ref ptr i)
(##c-code "u_int8_t *ptr = (u_int8_t*)___FIELD (___ARG1, ___FOREIGN_PTR);
___RESULT = ___FIX (*(ptr + ___INT(___ARG2)));"
ptr i))
(def (mem-u8vector-ref ptr start end)
(let* ((len (fx- end start))
(bytes (make-u8vector len)))
(##c-code "memcpy (U8_DATA (___ARG1), (u_int8_t*)___FIELD (___ARG2, ___FOREIGN_PTR) + ___INT (___ARG3), U8_LEN (___ARG1)); ___RESULT = ___VOID;"
bytes ptr start)
bytes))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment