Created
March 19, 2021 16:08
-
-
Save samth/7fc52e7bdc327fb59c8858a42258c26a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#lang racket/base | |
(require racket/fixnum racket/unsafe/ops racket/vector) | |
(#%declare #:unsafe) | |
(define basis 2166136261) | |
(define prime 16777619) | |
(define (fnv1a bs scale) | |
(for/fold ([h basis]) | |
([b (in-bytes bs)]) | |
(((fxxor h b) . fx* . prime) . fxmodulo . scale))) | |
(struct counter (ks vs cap lim size) #:mutable #:authentic) | |
(define (make-counter [cap (* 4 1024)]) | |
(counter (make-vector cap #f) | |
(make-fxvector cap 0) | |
cap | |
(inexact->exact (round (* cap 0.7))) | |
0)) | |
(define (counter-incr! h k) | |
(define ks (counter-ks h)) | |
(define vs (counter-vs h)) | |
(define size (counter-size h)) | |
(define cap (counter-cap h)) | |
(define lim (counter-lim h)) | |
(let-values ([(cap ks vs) | |
(cond [(fx>= size lim) | |
(define new-cap (fx* cap 2)) | |
(define new-lim (fx* lim 2)) | |
(define new-ks (make-vector new-cap #f)) | |
(define new-vs (make-fxvector new-cap 0)) | |
(for ([k (in-vector ks)] | |
[v (in-fxvector vs)] | |
#:when k) | |
(let loop ([idx (fnv1a k new-cap)]) | |
(cond | |
[(unsafe-vector*-ref new-ks idx) | |
(loop (fxmodulo (fx+ 1 idx) new-cap))] | |
[else | |
(unsafe-vector*-set! new-ks idx k) | |
(fxvector-set! new-vs idx v)]))) | |
(set-counter-cap! h new-cap) | |
(set-counter-lim! h new-lim) | |
(set-counter-ks! h new-ks) | |
(set-counter-vs! h new-vs) | |
(values new-cap new-ks new-vs)] | |
[else (values cap ks vs)])]) | |
(let loop ([idx (fnv1a k cap)]) | |
(define it-k (unsafe-vector*-ref ks idx)) | |
(cond | |
[it-k | |
(cond | |
[(bytes=? it-k k) | |
(fxvector-set! vs idx (fx+ 1 (fxvector-ref vs idx)))] | |
[else | |
(loop (fxmodulo (fx+ 1 idx) cap))])] | |
[else | |
(unsafe-vector*-set! ks idx k) | |
(fxvector-set! vs idx 1) | |
(set-counter-size! h (fx+ 1 size))])))) | |
(define (counter->vector h) | |
(for/vector #:length (counter-size h) | |
([k (in-vector (counter-ks h))] | |
[v (in-fxvector (counter-vs h))] | |
#:when k) | |
(cons k v))) | |
(define (downcase v) | |
(if (and (fx>= v 65) | |
(fx<= v 90)) | |
(fx+ v 32) | |
v)) | |
(define (countwords) | |
(define in (current-input-port)) | |
(define buf (make-bytes (fx* 1024 1024))) | |
(define wordbuf (make-bytes 1024)) | |
(define words (make-counter)) | |
(let loop ([bufoff 0] | |
[bufrem 0] | |
[wordoff 0]) | |
(define-values (new-bufrem new-bufoff) | |
(if (fx= 0 bufrem) | |
(values (read-bytes! buf in) 0) | |
(values bufrem bufoff))) | |
(unless (eof-object? new-bufrem) | |
(define b | |
(bytes-ref buf new-bufoff)) | |
(cond | |
[(or (fx= b 9) | |
(fx= b 10) | |
(fx= b 13) | |
(fx= b 32)) | |
(unless (fx= 0 wordoff) | |
(counter-incr! words (subbytes wordbuf 0 wordoff))) | |
(loop (fx+ 1 new-bufoff) (fx- new-bufrem 1) 0)] | |
[else | |
(bytes-set! wordbuf wordoff (downcase b)) | |
(loop (fx+ 1 new-bufoff) (fx- new-bufrem 1) (fx+ 1 wordoff))]))) | |
(define items | |
(time (vector-sort #:key cdr (counter->vector words) >))) | |
(time (for ([p (in-vector items)] | |
#:break (not (pair? p))) | |
(write-bytes (car p)) | |
(write-byte 32) | |
(write (cdr p)) | |
(write-byte 10)))) | |
(time (countwords)) | |
;; (require profile) | |
;; (profile-thunk | |
;; #:delay 0 | |
;; #:use-errortrace? #t | |
;; countwords) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment