Skip to content

Instantly share code, notes, and snippets.

@lojic
Last active December 6, 2023 00:27
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lojic/63dff68a3b84d6ab8e28d5d7cea807fc to your computer and use it in GitHub Desktop.
Save lojic/63dff68a3b84d6ab8e28d5d7cea807fc to your computer and use it in GitHub Desktop.
Fast I/O & Bytes
;; Copyright (c) 2023 Brian Adkins
#lang racket
(require "./bytes-view.rkt")
(provide create-block-in
fill-buffer!
next-line!)
(define buffer-size (* 256 1024))
(struct block-in (in ;; Input port
buffer ;; Buffer of bytes
line-beg ;; Index of beginning of current line inclusive
line-end ;; Index of end of current line exclusive
buffer-end) ;; Index of end of buffer exclusive
#:mutable
#:transparent)
(define (create-block-in [ in (current-input-port) ])
(block-in in
(make-bytes buffer-size)
0
0
0))
(define (fill-buffer! obj)
(let ([ line-beg (block-in-line-beg obj) ])
(when (> line-beg 0)
(let ([ buffer (block-in-buffer obj) ]
[ buffer-end (block-in-buffer-end obj) ])
;; Move existing bytes to beginning of buffer
(bytes-copy! buffer
0
buffer
line-beg
buffer-end)
(set-block-in-line-beg! obj 0)
(set-block-in-line-end! obj (- (block-in-line-end obj) line-beg))
(set-block-in-buffer-end! obj (- (block-in-buffer-end obj) line-beg)))))
;; Read more bytes into buffer
(let ([ num-read (read-bytes! (block-in-buffer obj) (block-in-in obj) (block-in-buffer-end obj) buffer-size) ])
(when (not (eof-object? num-read))
(set-block-in-buffer-end! obj (+ (block-in-buffer-end obj)
num-read)))))
(define (next-line! obj)
(let* ([ buffer (block-in-buffer obj) ]
[ line-beg (block-in-line-beg obj) ]
[ buffer-end (block-in-buffer-end obj) ]
[ idx (byte-index-of buffer
line-beg
(char->integer #\newline)
buffer-end) ])
(cond [ idx
;; Newline found, return the line
(let* ([ line-end (add1 idx) ]
[ line (bytes-view buffer line-beg line-end) ])
(set-block-in-line-beg! obj line-end)
(set-block-in-line-end! obj line-end)
line) ]
[ else
(fill-buffer! obj)
(let loop ([ buffer (block-in-buffer obj) ]
[ line-beg (block-in-line-beg obj) ]
[ line-end (block-in-line-end obj) ]
[ buffer-end (block-in-buffer-end obj) ])
(if (< line-end buffer-end)
(let ([ c (integer->char (bytes-ref buffer line-end)) ])
(set-block-in-line-end! obj (add1 line-end))
(if (char=? c #\newline)
(let* ([ line-end (block-in-line-end obj) ]
[ line (bytes-view buffer line-beg line-end) ])
(set-block-in-line-beg! obj line-end)
line)
(loop buffer
line-beg
(block-in-line-end obj)
buffer-end)))
(bytes-view buffer line-beg line-end))) ])))
;; Copyright (c) 2023 Brian Adkins
#lang racket
(require "./bytes-view.rkt")
(provide create-block-out
block-write-bytes!
flush-buffer!)
(define buffer-size (* 256 1024))
(struct block-out (out
buffer
end)
#:mutable
#:transparent)
(define (create-block-out [ out (current-output-port) ])
(block-out out
(make-bytes buffer-size)
0))
(define (flush-buffer! obj)
(let ([ end (block-out-end obj) ])
(when (> end 0)
(write-bytes (block-out-buffer obj) (block-out-out obj) 0 end)
(set-block-out-end! obj 0))))
(define (block-write-bytes! obj view)
(let* ([ buffer (block-out-buffer obj) ]
[ view-beg (bytes-view-beg view) ]
[ view-end (bytes-view-end view) ]
[ view-len (- view-end view-beg) ])
(when (> (+ (block-out-end obj) view-len) buffer-size)
(flush-buffer! obj))
(let ([ end (block-out-end obj) ])
(bytes-copy! buffer end (bytes-view-buffer view) view-beg view-end)
(set-block-out-end! obj (+ end view-len)))))
;; Copyright (c) 2023 Brian Adkins
#lang racket
(require "./bytes-view.rkt")
(provide bytes-view-soundex)
(define empty-view (create-bytes-view #""))
;; "-123-12_-22455-12623-1_2_2" ; original from wikipedia
;; ABCDEFGHIJKLMNOPQRSTUVWXYZ
(define codes #"-123-12_-22455-12623-1_2-2") ; treat y as a vowel to match Racket code for now
(define bA (char->integer #\A))
(define bZ (char->integer #\Z))
(define ba (char->integer #\a))
(define bz (char->integer #\z))
(define (get-code b)
(cond [ (and (>= b bA) (<= b bZ))
(bytes-ref codes (- b bA)) ]
[ (and (>= b ba) (<= b bz))
(bytes-ref codes (- b ba)) ]
[ else
0 ]))
(define (is-alpha b)
(or (and (>= b bA) (<= b bZ))
(and (>= b ba) (<= b bz))))
(define (bytes-view-soundex view)
(define len (bytes-view-length view))
(define out (make-bytes 4))
(bytes-copy! out 0 #"0000")
;; Set si to index of first alpha, if any
(define si (let loop ([ i 0 ])
(if (or (>= i len) (is-alpha (bytes-view-ref view i)))
i
(loop (add1 i)))))
(cond [ (>= si len)
empty-view ]
[ else
(bytes-set! out 0 (bytes-view-ref view si))
(let loop ([ si (add1 si) ][ i 1 ][ prev (get-code (bytes-ref out 0)) ])
(if (or (>= si len) (>= i 4))
(create-bytes-view out)
(let ([ code (get-code (bytes-view-ref view si)) ])
(cond [ (or (= code 0) (= code prev))
(loop (add1 si) i prev) ]
[ (= code (char->integer #\-))
; vowel separator
(loop (add1 si) i 0) ]
[ (= code (char->integer #\_))
; h, w, y separator do nothing
(loop (add1 si) i prev) ]
[ else
(bytes-set! out i code)
(loop (add1 si) (add1 i) code) ])))) ]))
(module+ test
(require rackunit)
;; get-code
(check-equal? (get-code (char->integer #\L)) (char->integer #\4))
;; bytes-view-soundex
(for ([ pair (in-list '((#"GLENWYN" . #"G455")
(#"-CHARLES" . #"C642")
(#"Tymczak" . #"T522")
(#"Rupert" . #"R163")
(#"Robert" . #"R163")
(#"Ashcraft" . #"A261")
(#"Burroughs" . #"B620")
(#"Burrows" . #"B620")
(#"Ekzampul" . #"E251")
(#"Ellery" . #"E460")
(#"Euler" . #"E460")
(#"Example" . #"E251")
(#"Gauss" . #"G200")
(#"Ghosh" . #"G200")
(#"Gutierrez" . #"G362")
(#"Heilbronn" . #"H416")
(#"Hilbert" . #"H416")
(#"Jackson" . #"J250")
(#"Kant" . #"K530")
(#"Knuth" . #"K530")
(#"Ladd" . #"L300")
(#"Lee" . #"L000")
(#"Lissajous" . #"L222")
(#"Lloyd" . #"L300")
(#"Lukasiewicz" . #"L222")
(#"O'Hara" . #"O600")
(#"Pfister" . #"P236")
(#"Soundex" . #"S532")
(#"Sownteks" . #"S532")
(#"Tymczak" . #"T522")
(#"VanDeusen" . #"V532")
(#"Washington" . #"W252")
(#"Wheaton" . #"W350"))) ])
(let* ([ bstr (car pair) ]
[ view (bytes-view-soundex (create-bytes-view bstr)) ])
(check-equal? (bytes-view->bytes view) (cdr pair))))
)
#;(define (compute-soundex view)
(~> (bytes-view->bytes view)
(bytes->string/latin-1 _)
(soundex-encode _)
(string->bytes/latin-1 _)
(create-bytes-view _)))
;; Copyright (c) 2023 Brian Adkins
#lang racket
;; Byte String View
;; Enhances performance by reducing allocation and copying of bytes.
(provide byte-index-of
bytes-view->bytes
bytes-view-index-of
bytes-view-length
bytes-view-ref
bytes-view-starts-with
bytes-view-subview
create-bytes-view
empty-bytes-view?
split-bytes-view
trim-bytes-view!
(struct-out bytes-view))
(struct bytes-view (buffer
beg
end)
#:mutable
#:transparent)
(define (byte-index-of bstr idx b end)
(let loop ([ idx idx ])
(if (< idx end)
(if (= b (bytes-ref bstr idx))
idx
(loop (add1 idx)))
#f)))
;; Return index of b (relative to beginning of view, or #f if not found
(define (bytes-view-index-of obj b)
(let ([ pos (byte-index-of (bytes-view-buffer obj)
(bytes-view-beg obj)
b
(bytes-view-end obj)) ])
(if pos
(- pos (bytes-view-beg obj))
#f)))
(define (bytes-view->bytes obj)
(subbytes (bytes-view-buffer obj)
(bytes-view-beg obj)
(bytes-view-end obj)))
(define (bytes-view-length obj)
(- (bytes-view-end obj) (bytes-view-beg obj)))
(define (bytes-view-ref obj pos)
(bytes-ref (bytes-view-buffer obj) (+ pos (bytes-view-beg obj))))
(define (bytes-view-starts-with obj bstr)
(let ([ buf-len (bytes-view-length obj) ]
[ bstr-len (bytes-length bstr) ])
(let loop ([ idx 0 ])
(cond [ (>= idx bstr-len) #t ] ; Ran out of bstr
[ (>= idx buf-len) #f ] ; Ran out of view
[ (= (bytes-view-ref obj idx) (bytes-ref bstr idx))
(loop (add1 idx)) ]
[ else #f ]))))
(define (bytes-view-subview obj beg end)
(let ([ view-beg (bytes-view-beg obj) ])
(create-bytes-view (bytes-view-buffer obj)
(+ beg view-beg)
(+ end view-beg))))
(define (create-bytes-view bstr [ beg #f ] [ end #f ])
(let ([ beg (if beg beg 0) ]
[ end (if end end (bytes-length bstr)) ])
(bytes-view bstr beg end)))
(define (empty-bytes-view? obj)
(= 0 (bytes-view-length obj)))
(define (split-bytes-view obj delim-char)
(let ([ delim (char->integer delim-char) ]
[ len (bytes-view-length obj) ])
(if (< len 1)
'()
(let loop ([ first 0 ]
[ tok-end 0 ]
[ si 0 ]
[ result '() ])
(if (< si len)
(if (= delim (bytes-view-ref obj si))
(let ([ si (add1 si) ])
(if (> tok-end 0)
(let ([ token (bytes-view-subview obj first tok-end) ])
(trim-bytes-view! token)
(loop si si si (cons token result)))
(loop si si si (cons (create-bytes-view (bytes-view-buffer obj) 0 0) result))))
(loop first (add1 tok-end) (add1 si) result))
(if (> tok-end 0)
;; Last token
(let ([ token (bytes-view-subview obj first tok-end) ])
(trim-bytes-view! token)
(reverse (cons token result)))
(if (= delim (bytes-view-ref obj (sub1 len)))
;; Handle trailing delim
(reverse (cons (create-bytes-view (bytes-view-buffer obj) 0 0) result))
(reverse result))))))))
(define (trim-bytes-view! obj)
(define buffer (bytes-view-buffer obj))
(let loop ([ beg (bytes-view-beg obj) ]
[ end (bytes-view-end obj) ])
(cond [ (<= end beg)
;; In case end < beg, use beg for both to indicate empty
(set-bytes-view-beg! obj beg)
(set-bytes-view-end! obj beg) ]
[ else
(let* ([ b1 (bytes-ref buffer beg) ]
[ b2 (bytes-ref buffer (sub1 end)) ]
[ beg2 (if (char-whitespace? (integer->char b1))
(add1 beg)
beg) ]
[ end2 (if (char-whitespace? (integer->char b2))
(sub1 end)
end) ])
(cond [ (and (= beg beg2) (= end end2))
;; No change, we're done
(set-bytes-view-beg! obj beg2)
(set-bytes-view-end! obj end2) ]
[ else
(loop beg2 end2) ])) ])))
(module+ test
(require rackunit)
;; bytes-view
(let ([ obj (bytes-view #"Hello" 1 4) ])
(check-false (empty-bytes-view? obj))
(let ([ bstr (subbytes (bytes-view-buffer obj) (bytes-view-beg obj) (bytes-view-end obj)) ])
(check-equal? bstr #"ell")))
;; byte-index-of
(let* ([ bstr #"Hello, world" ]
[ idx (byte-index-of bstr 2 (char->integer #\,) (bytes-length bstr)) ])
(check-equal? idx 5))
;; bytes-view-index-of
;; 012345678901
(let* ([ bstr #"Hello, world" ]
;; view is #"llo, wo"
[ view (create-bytes-view bstr 2 9) ])
(check-equal? (bytes-view-index-of view (char->integer #\,)) 3)
(check-equal? (bytes-view-index-of view (char->integer #\w)) 5)
(check-false (bytes-view-index-of view (char->integer #\d))))
;; bytes-view-starts-with
(let* ([ bstr #"abcdefg" ]
[ view (create-bytes-view bstr 1) ])
(check-not-false (bytes-view-starts-with view #"b"))
(check-not-false (bytes-view-starts-with view #"bcd"))
(check-false (bytes-view-starts-with view #"bcdefgh"))
)
;; split-bytes-view
(let* ([ bstr #" Smith, John,G.\n" ]
[ view (create-bytes-view bstr) ]
[ lst (split-bytes-view view #\, ) ])
(check-equal? (length lst) 3)
(check-equal? (bytes-view->bytes (first lst)) #"Smith")
(check-equal? (bytes-view->bytes (second lst)) #"John")
(check-equal? (bytes-view->bytes (third lst)) #"G."))
(let* ([ bstr #" ,Smith, John,G.\n," ]
[ view (create-bytes-view bstr) ]
[ lst (split-bytes-view view #\, ) ])
(check-equal? (length lst) 5)
(check-equal? (bytes-view->bytes (first lst)) #"")
(check-equal? (bytes-view->bytes (second lst)) #"Smith")
(check-equal? (bytes-view->bytes (third lst)) #"John")
(check-equal? (bytes-view->bytes (fourth lst)) #"G.")
(check-equal? (bytes-view->bytes (fifth lst)) #""))
;; trim-bytes-view!
(for ([ pair (in-list '((#"" . #"")
(#" " . #"")
(#"A" . #"A")
(#"\tA" . #"A")
(#"\t A B\n" . #"A B")
)) ])
(let* ([ view (create-bytes-view (car pair)) ]
[ bstr (cdr pair) ])
(trim-bytes-view! view)
(check-equal? (bytes-view->bytes view) bstr)))
)
;; Copyright (c) 2023 Brian Adkins
#lang racket
(require "./block-input.rkt"
"./block-output.rkt"
"./bytes-view.rkt")
(module+ main
(define in (create-block-in))
(define out (create-block-out))
(let loop ([ line (next-line! in) ])
(when (not (empty-bytes-view? line))
(block-write-bytes! out line)
(loop (next-line! in))))
(flush-buffer! out))
#;(module+ main
(define in (current-input-port))
(define out (current-output-port))
(for ([ line (in-bytes-lines in) ])
(write-bytes line out)
(newline out)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment