Skip to content

Instantly share code, notes, and snippets.

@samth
Created March 18, 2021 17:51
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save samth/d6fbe3757c462d5b4d1d9393b72f9ab9 to your computer and use it in GitHub Desktop.
Save samth/d6fbe3757c462d5b4d1d9393b72f9ab9 to your computer and use it in GitHub Desktop.
#lang racket/base
(require racket/string racket/bytes racket/vector racket/port)
(define h (make-hash))
;(#%declare #:unsafe)
(define (split-lines/bytes)
(for* ([l (in-bytes-lines)])
(let loop ([i 0] [last 0])
(cond [(= i (bytes-length l))
(unless (eq? i last)
(hash-update! h (bytes-downcase! (subbytes l last i)) add1 0))]
[(eq? (bytes-ref l i) (char->integer #\space))
(unless (eq? i last)
(hash-update! h (bytes-downcase! (subbytes l last i)) add1 0))
(loop (add1 i) (add1 i))]
[else
(loop (add1 i) last)]))))
(define (bytes-downcase! b)
(for ([(n i) (in-indexed (in-bytes b))])
(when (<= (char->integer #\A) n (char->integer #\Z))
(bytes-set! b i (- n (- (char->integer #\A) (char->integer #\a))))))
b)
(define (string-downcase! b)
(for ([(n i) (in-indexed (in-string b))])
(when (char<=? (values #\A) n (values #\Z))
(string-set! b i (integer->char (- (char->integer n) (- (char->integer #\A) (char->integer #\a)))))))
b)
(define (split-lines/string)
(for* ([l (in-lines)])
(let loop ([i 0] [last 0])
(cond [(= i (string-length l))
(unless (eq? i last)
(hash-update! h (string-downcase (substring l last i)) add1 0))]
[(or (eq? (string-ref l i) #\space)
(eq? (string-ref l i) #\tab))
(unless (eq? i last)
(hash-update! h (string-downcase (substring l last i)) add1 0))
(loop (add1 i) (add1 i))]
[else
(loop (add1 i) last)]))))
(define (split-chunks/string)
(for* ([l (in-producer (lambda () (read-string 64000)) eof)])
(let loop ([i 0] [last 0])
(cond [(= i (string-length l))
(unless (eq? i last)
(hash-update! h (string-downcase (substring l last i)) add1 0))]
[(or (eq? (string-ref l i) #\space)
(eq? (string-ref l i) #\newline)
(eq? (string-ref l i) #\tab))
(unless (eq? i last)
(hash-update! h (string-downcase (substring l last i)) add1 0))
(loop (add1 i) (add1 i))]
[else
(loop (add1 i) last)]))))
(define (split-chunks/bytes)
(for* ([l (in-producer (lambda () (read-bytes 64000)) eof)])
(let loop ([i 0] [last 0])
(cond [(= i (bytes-length l))
(unless (eq? i last)
(hash-update! h (bytes-downcase! (subbytes l last i)) add1 0))]
[(or (eq? (bytes-ref l i) (char->integer #\space))
(eq? (bytes-ref l i) (char->integer #\newline))
(eq? (bytes-ref l i) (char->integer #\tab)))
(unless (eq? i last)
(hash-update! h (bytes-downcase! (subbytes l last i)) add1 0))
(loop (add1 i) (add1 i))]
[else
(loop (add1 i) last)]))))
(define (split-port-regexp)
(let loop ()
(define word (regexp-match #px"[^\\s\n]+" (current-input-port)))
(when word
(define w* (string-downcase (bytes->string/utf-8 (car word))))
(hash-update! h w* add1 0)
(loop))))
(time (split-lines/string))
(define v
(time
(for/vector #:length (hash-count h)
([(k v) (in-hash h)])
(cons k v))))
(time (vector-sort! v > #:key cdr))
(define p (current-output-port) #; (open-output-nowhere))
(time
(for ([pair (in-vector v)])
(display (car pair) p) (write-bytes #" " p)
(write (cdr pair) p)
(newline p)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment