Skip to content

Instantly share code, notes, and snippets.

@samth

samth/countwords.rkt

Created Mar 19, 2021
Embed
What would you like to do?
#lang racket/base
(require racket/fixnum racket/unsafe/ops racket/vector)
(#%declare #:unsafe)
(define basis 2166136261)
(define prime 16777619)
(define (fnv1a bs scale)
(for/fold ([h basis])
([b (in-bytes bs)])
(((fxxor h b) . fx* . prime) . fxmodulo . scale)))
(struct counter (ks vs cap lim size) #:mutable #:authentic)
(define (make-counter [cap (* 4 1024)])
(counter (make-vector cap #f)
(make-fxvector cap 0)
cap
(inexact->exact (round (* cap 0.7)))
0))
(define (counter-incr! h k)
(define ks (counter-ks h))
(define vs (counter-vs h))
(define size (counter-size h))
(define cap (counter-cap h))
(define lim (counter-lim h))
(let-values ([(cap ks vs)
(cond [(fx>= size lim)
(define new-cap (fx* cap 2))
(define new-lim (fx* lim 2))
(define new-ks (make-vector new-cap #f))
(define new-vs (make-fxvector new-cap 0))
(for ([k (in-vector ks)]
[v (in-fxvector vs)]
#:when k)
(let loop ([idx (fnv1a k new-cap)])
(cond
[(unsafe-vector*-ref new-ks idx)
(loop (fxmodulo (fx+ 1 idx) new-cap))]
[else
(unsafe-vector*-set! new-ks idx k)
(fxvector-set! new-vs idx v)])))
(set-counter-cap! h new-cap)
(set-counter-lim! h new-lim)
(set-counter-ks! h new-ks)
(set-counter-vs! h new-vs)
(values new-cap new-ks new-vs)]
[else (values cap ks vs)])])
(let loop ([idx (fnv1a k cap)])
(define it-k (unsafe-vector*-ref ks idx))
(cond
[it-k
(cond
[(bytes=? it-k k)
(fxvector-set! vs idx (fx+ 1 (fxvector-ref vs idx)))]
[else
(loop (fxmodulo (fx+ 1 idx) cap))])]
[else
(unsafe-vector*-set! ks idx k)
(fxvector-set! vs idx 1)
(set-counter-size! h (fx+ 1 size))]))))
(define (counter->vector h)
(for/vector #:length (counter-size h)
([k (in-vector (counter-ks h))]
[v (in-fxvector (counter-vs h))]
#:when k)
(cons k v)))
(define (downcase v)
(if (and (fx>= v 65)
(fx<= v 90))
(fx+ v 32)
v))
(define (countwords)
(define in (current-input-port))
(define buf (make-bytes (fx* 1024 1024)))
(define wordbuf (make-bytes 1024))
(define words (make-counter))
(let loop ([bufoff 0]
[bufrem 0]
[wordoff 0])
(define-values (new-bufrem new-bufoff)
(if (fx= 0 bufrem)
(values (read-bytes! buf in) 0)
(values bufrem bufoff)))
(unless (eof-object? new-bufrem)
(define b
(bytes-ref buf new-bufoff))
(cond
[(or (fx= b 9)
(fx= b 10)
(fx= b 13)
(fx= b 32))
(unless (fx= 0 wordoff)
(counter-incr! words (subbytes wordbuf 0 wordoff)))
(loop (fx+ 1 new-bufoff) (fx- new-bufrem 1) 0)]
[else
(bytes-set! wordbuf wordoff (downcase b))
(loop (fx+ 1 new-bufoff) (fx- new-bufrem 1) (fx+ 1 wordoff))])))
(define items
(time (vector-sort #:key cdr (counter->vector words) >)))
(time (for ([p (in-vector items)]
#:break (not (pair? p)))
(write-bytes (car p))
(write-byte 32)
(write (cdr p))
(write-byte 10))))
(time (countwords))
;; (require profile)
;; (profile-thunk
;; #:delay 0
;; #:use-errortrace? #t
;; countwords)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment