Skip to content

Instantly share code, notes, and snippets.

@tonyg
Last active July 14, 2022 09:53
Show Gist options
  • Save tonyg/1dfacc46e4488d5cc2139f48b533240a to your computer and use it in GitHub Desktop.
Save tonyg/1dfacc46e4488d5cc2139f48b533240a to your computer and use it in GitHub Desktop.
#lang racket
;; https://github.com/NuxiNL/argdata
(provide (all-defined-out))
;;---------------------------------------------------------------------------
;; Representing values
(struct timestamp (ns) #:transparent)
(struct fd (number) #:transparent)
;;---------------------------------------------------------------------------
;; Reader
(define (read-argdata input-length [in-port (current-input-port)])
(let/ec return
(define (next payload-length)
(match (next-byte)
[#x01 (next-bytes payload-length)]
[#x02 (if (zero? payload-length) #f (begin (next-byte) #t))] ;; should check (next-byte)!
[#x03 (fd (integer-bytes->integer (next-bytes 4) #f #t))]
[#x04 (floating-point-bytes->real (next-bytes 8) #t 0 8)]
[#x05 (next-integer payload-length)]
[#x06 (apply hash (next-items payload-length))]
[#x07 (next-items payload-length)]
[#x08 (begin0 (bytes->string/utf-8 (next-bytes (- payload-length 1))) (next-byte))]
[#x09 (timestamp (next-integer payload-length))]))
(define (next-items n) (if (zero? n) '() (next-items1 n)))
(define (next-items1 n)
(let loop ((acc 0) (n (- n 1)))
(define b (next-byte))
(cond [(< b 128) (loop (+ (* acc 128) b) (- n 1))]
[else (define f (+ (* acc 128) (- b 128)))
(cons (next (- f 1)) (next-items (- n f)))])))
(define (next-byte) (match (read-byte in-port) [(? eof-object?) (return eof)] [b b]))
(define (next-bytes n)
(define bs (match (read-bytes n in-port) [(? eof-object?) (return eof)] [bs bs]))
(if (< (bytes-length bs) n) (return eof) bs))
(define (next-integer n)
(if (zero? n)
0
(let* ((acc0 (next-byte)) (acc (if (< acc0 128) acc0 (- acc0 256))))
(for/fold [(acc acc)] [(n (in-range (- n 1)))] (+ (* acc 256) (next-byte))))))
(next (- input-length 1))))
;;---------------------------------------------------------------------------
;; Writer
(define (write-argdata v [out-port (current-output-port)])
(define (output-byte b) (write-byte b out-port))
(define (output-bytes bs) (write-bytes bs out-port))
(define (output-integer v)
(when (not (zero? v))
(define raw-bit-count (+ (integer-length v) 1)) ;; at least one sign bit
(define byte-count (quotient (+ raw-bit-count 7) 8))
(for [(shift (in-range (* byte-count 8) 0 -8))]
(output-byte (bitwise-bit-field v (- shift 8) shift)))))
(define (output-sub v)
(define bs (call-with-output-bytes (lambda (p) (write-argdata v p))))
(output-varint (bytes-length bs)) (output-bytes bs))
(define (output-varint v)
(let w ((v v) (d 128))
(if (< v 128)
(output-byte (+ d v))
(begin (w (quotient v 128) 0)
(output-byte (+ d (modulo v 128)))))))
(match v
[(? bytes?) (output-byte #x01) (output-bytes v)]
[#f (output-byte #x02)]
[#t (output-byte #x02) (output-byte #x01)]
[(fd n) (output-byte #x03) (integer->integer-bytes n 4 #f #t)]
[(? flonum?) (output-byte #x04) (output-bytes (real->floating-point-bytes v 8 #t))]
[(? integer?) (output-byte #x05) (output-integer v)]
[(? hash? d) (output-byte #x06) (for [((k v) (in-hash d))] (output-sub k) (output-sub v))]
[(? list? l) (output-byte #x07) (for [(v (in-list l))] (output-sub v))]
[(? string?) (output-byte #x08) (output-bytes (string->bytes/utf-8 v)) (output-byte 0)]
[(timestamp ns) (output-byte #x09) (output-integer ns)]))
#lang racket
;; Jelly, a very shaky implementation of Preserves - intended to
;; demonstrate a minimal implementation of Preserves binary I/O,
;; without error-checking or configurability etc.
;; This is blue-flavoured jelly. New, exciting, experimental, not sure if food?
;; (It's a response to https://github.com/NuxiNL/argdata, experimenting with an alternative
;; binary encoding for full preserves.)
(provide (all-defined-out))
(require "terms.rkt")
(require (only-in file/sha1 bytes->hex-string))
(define (blue->bytes v) (call-with-output-bytes (lambda (p) (write-blue v p))))
(define (bytes->blue bs) (read-blue (bytes-length bs) (open-input-bytes bs)))
;;---------------------------------------------------------------------------
;; Reader
;; Note to self: #x80 -- #xBF are free
;; Varints: like argdata. Big-endian, high bit set for final septet in sequence.
;; Streams of packets: Send byte #xA8 (sequence), then the packets, prefixed by varint lengths
(define (read-blue count [in-port (current-input-port)])
(define (next-byte) (if (zero? count) eof (begin (set! count (- count 1)) (read-byte in-port))))
(define (remaining-bytes) (begin0 (read-bytes count in-port) (set! count 0)))
(define (chop-last bs) (subbytes bs 0 (- (bytes-length bs) 1)))
(define (next)
(match (next-byte)
[#xA0 #f]
[#xA1 #t]
[#xA2 #:when (= count 4) (float (floating-point-bytes->real (remaining-bytes) #t 0 4))]
[#xA2 #:when (= count 8) (floating-point-bytes->real (remaining-bytes) #t 0 8)]
[#xA3 #:when (zero? count) 0]
[#xA3 (let* ((acc0 (next-byte)) (acc (if (< acc0 128) acc0 (- acc0 256))))
(for/fold [(acc acc)] [(b (remaining-bytes))] (+ (* acc 256) b)))]
[#xA4 (bytes->string/utf-8 (chop-last (remaining-bytes)))]
[#xA5 (remaining-bytes)]
[#xA6 (string->symbol (bytes->string/utf-8 (remaining-bytes)))]
[#xA7 (apply (lambda (label . fields) (record label fields)) (next-items))]
[#xA8 (next-items)]
[#xA9 (list->set (next-items))]
[#xAA (apply hash (next-items))]
[#xAB (embedded (next))]
[#xBF (apply (lambda (v . anns) (annotated v anns)) (next-items))]))
(define (next-items) (if (zero? count) '() (cons (next-item) (next-items))))
(define (next-item) (let* ((block-len (next-varint)) (next-count (- count block-len)))
(set! count block-len)
(begin0 (next) (set! count next-count))))
(define (next-varint [acc 0] [b (next-byte)])
(if (< b 128) (next-varint (+ (* acc 128) b) (next-byte)) (+ (* acc 128) (- b 128))))
(next))
;;---------------------------------------------------------------------------
;; Writer
(define (write-blue v [out-port (current-output-port)] [anns? #t])
(define (! b) (write-byte b out-port))
(define (enc v [anns? #f]) (call-with-output-bytes (lambda (p) (write-blue v p anns?))))
(define (enc-car kv) (enc (car kv)))
(define (output* v)
(define bs (enc v anns?))
(let w ((n (bytes-length bs)) (d 128))
(if (< n 128) (! (+ d n)) (begin (w (quotient n 128) 0) (! (+ d (modulo n 128))))))
(write-bytes bs out-port))
(define (flatpairs es) (match es ['() '()] [(list* (cons k v) r) (list* k v (flatpairs r))]))
(match v
[(? boolean?) (! (if v #xA1 #xA0))]
[(float v) (! #xA2) (write-bytes (real->floating-point-bytes v 4 #t) out-port)]
[(? flonum?) (! #xA2) (write-bytes (real->floating-point-bytes v 8 #t) out-port)]
[0 (! #xA3)]
[(? integer?) (! #xA3) (for [(shift (in-range (bitwise-and (+ (integer-length v) 8) -8) 0 -8))]
(! (bitwise-bit-field v (- shift 8) shift)))]
[(? string?) (! #xA4) (write-bytes (string->bytes/utf-8 v) out-port) (! #x00)]
[(? bytes?) (! #xA5) (write-bytes v out-port)]
[(? symbol?) (! #xA6) (write-bytes (string->bytes/utf-8 (symbol->string v)) out-port)]
[(record label fields) (! #xA7) (for-each output* (cons label fields))]
[(? list?) (! #xA8) (for-each output* v)]
[(? set?) (! #xA9) (for-each output* (sort (set->list v) bytes<? #:key enc))]
[(? hash?) (! #xAA) (for-each output* (flatpairs (sort (hash->list v) bytes<? #:key enc-car)))]
[(embedded w) (! #xAB) (write-blue w out-port anns?)]
[(annotated v (? pair? anns)) #:when anns? (! #xBF) (for-each output* (cons v anns))]
[(annotated v _) (write-blue v out-port anns?)]))
#lang racket
;; Jelly, a very shaky implementation of Preserves - intended to
;; demonstrate a minimal implementation of Preserves binary I/O,
;; without error-checking or configurability etc.
;; Cut down even further just to see how low it could go:
;; - no 32-bit floats
;; - no short- or medium-length integers
;; - no annotations (!)
;; - no symbols (!)
;; - no sets
;; - no records (!!)
(provide (all-defined-out))
(require "terms.rkt")
;;---------------------------------------------------------------------------
;; Reader
(define (read-preserve/binary [in-port (current-input-port)])
(let/ec return
(define (next)
(match (next-byte)
[#x80 #f]
[#x81 #t]
[#x83 (floating-point-bytes->real (next-bytes 8) #t 0 8)]
[#x84 '#:end]
[#x86 (embedded (next))]
[#xB0 (next-integer (next-varint))]
[#xB1 (bytes->string/utf-8 (next-bytes (next-varint)))]
[#xB2 (next-bytes (next-varint))]
[#xB5 (next-items)]
[#xB7 (apply hash (next-items))]))
(define (next-items) (match (next) ['#:end '()] [v (cons v (next-items))]))
(define (eof-guard v) (if (eof-object? v) (return eof) v))
(define (next-byte) (eof-guard (read-byte in-port)))
(define (next-bytes n)
(define bs (eof-guard (read-bytes n in-port)))
(if (< (bytes-length bs) n) (return eof) bs))
(define (next-varint)
(let loop ()
(define b (next-byte))
(cond [(< b 128) b]
[else (+ (* (loop) 128) (- b 128))])))
(define (next-integer n)
(define acc0 (next-byte))
(define acc (if (< acc0 128) acc0 (- acc0 256)))
(for/fold [(acc acc)] [(n (in-range (- n 1)))] (+ (* acc 256) (next-byte))))
(next)))
;;---------------------------------------------------------------------------
;; Writer
(define (write-preserve/binary v [out-port (current-output-port)])
(define (output v)
(match v
[#f (write-byte #x80 out-port)]
[#t (write-byte #x81 out-port)]
[(? flonum?) (write-byte #x83 out-port) (output-bytes (real->floating-point-bytes v 8 #t))]
[(embedded v) (write-byte #x86 out-port) (output v)]
[(? integer?)
(define raw-bit-count (+ (integer-length v) 1)) ;; at least one sign bit
(define byte-count (quotient (+ raw-bit-count 7) 8))
(write-byte #xB0 out-port)
(write-varint byte-count out-port)
(for [(shift (in-range (* byte-count 8) 0 -8))]
(write-byte (bitwise-bit-field v (- shift 8) shift) out-port))]
[(? string?) (count-bytes 1 (string->bytes/utf-8 v))]
[(? bytes?) (count-bytes 2 v)]
[(? list?) (with-seq 5 (for-each output v))]
[(? hash?) (with-seq 7 (output-hash v))]))
(define (output-bytes bs) (write-bytes bs out-port))
(define-syntax-rule (with-seq tag body ...)
(begin (write-byte (+ tag #xB0) out-port)
body ...
(write-byte #x84 out-port)))
(define (count-bytes tag bs)
(write-byte (+ tag #xB0) out-port)
(write-varint (bytes-length bs) out-port)
(output-bytes bs))
(define (encode v) (call-with-output-bytes (lambda (p) (write-preserve/binary v p))))
(define (output-hash d)
(define encoded-entries (for/list [((k v) (in-hash d))] (cons (encode k) (encode v))))
(for-each output-bytes (flatten (sort encoded-entries bytes<? #:key car))))
(output v))
(define (write-varint v out-port)
(if (< v 128)
(write-byte v out-port)
(begin (write-byte (+ 128 (modulo v 128)) out-port)
(write-varint (quotient v 128) out-port))))
#lang racket
;; Jelly, a very shaky implementation of Preserves - intended to
;; demonstrate a minimal implementation of Preserves binary I/O,
;; without error-checking or configurability etc.
;; Cut down from main, but not as radical a change as blue.
(provide (all-defined-out))
(require "terms.rkt")
;; Same as main, except:
;; - 82/83 are retired, replaced by 87;
;; - 85 is retired, replaced by BF...84; and
;; - 90...AF are retired, replaced by consistent usage of B0.
;;
;; 80 False
;; 81 True
;; (82-83)
;; 84 End
;; (85)
;; 86 Embedded
;; 87 Float/Double
;; (88-AF)
;; B0 SignedInteger
;; B1 String
;; B2 ByteString
;; B3 Symbol
;; B4 Record
;; B5 Sequence
;; B6 Set
;; B7 Dictionary
;; (B8-BE)
;; BF Annotation
;;---------------------------------------------------------------------------
;; Reader
(define (read-preserve/binary [in-port (current-input-port)])
(let/ec return
(define (next-byte) (match (read-byte in-port) [(? eof-object?) (return eof)] [b b]))
(define (next-bytes n)
(define bs (match (read-bytes n in-port) [(? eof-object?) (return eof)] [bs bs]))
(if (< (bytes-length bs) n) (return eof) bs))
(define (next-varint [acc 0] [b (next-byte)])
(if (< b 128) (next-varint (+ (* acc 128) b)) (+ (* acc 128) (- b 128))))
(let next ()
(define (next-items) (match (next) ['#:end '()] [v (cons v (next-items))]))
(match (next-byte)
[#x80 #f]
[#x81 #t]
[#x84 '#:end]
[#x86 (embedded (next))]
[#x87 (match (next-varint)
[4 (float (floating-point-bytes->real (next-bytes 4) #t 0 4))]
[8 (floating-point-bytes->real (next-bytes 8) #t 0 8)])]
[#xB0 (match (next-varint)
[0 0]
[m (define acc (match (next-byte) [p #:when (< p 128) p] [n (- n 256)]))
(for/fold [(acc acc)] [(q (in-range (- m 1)))] (+ (* acc 256) (next-byte)))])]
[#xB1 (bytes->string/utf-8 (next-bytes (next-varint)))]
[#xB2 (next-bytes (next-varint))]
[#xB3 (string->symbol (bytes->string/utf-8 (next-bytes (next-varint))))]
[#xB4 (apply (lambda (l . fs) (record l fs)) (next-items))]
[#xB5 (next-items)]
[#xB6 (list->set (next-items))]
[#xB7 (apply hash (next-items))]
[#xBF (apply (lambda (i . as) (annotated i as)) (next-items))]))))
;;---------------------------------------------------------------------------
;; Writer
(define (write-preserve/binary v [out-port (current-output-port)])
(define (! b) (write-byte b out-port))
(define (!! bs)
(let wlen ((n (bytes-length bs)) (d 128))
(cond [(< n 128) (! (+ d n))] [else (wlen (quotient n 128) 0) (! (+ d (modulo n 128)))]))
(!!! bs))
(define (!!! bs) (write-bytes bs out-port))
(define (enc v) (call-with-output-bytes (lambda (p) (write-preserve/binary v p))))
(define (H v) (flatten (sort (for/list [((k w) v)] (cons (enc k) (enc w))) bytes<? #:key car)))
(let output ((v v))
(match v
[#f (! #x80)]
[#t (! #x81)]
[(embedded v) (! #x86) (output v)]
[(float v) (! #x87) (!! (real->floating-point-bytes v 4 #t))]
[(? flonum?) (! #x87) (!! (real->floating-point-bytes v 8 #t))]
[(? integer?) (define n (if (zero? v) 0 (bitwise-and (+ (integer-length v) 8) -8)))
(! #xB0) (!! (list->bytes (for/list [(x (in-range n 0 -8))]
(bitwise-bit-field v (- x 8) x))))]
[(? string?) (! #xB1) (!! (string->bytes/utf-8 v))]
[(? bytes?) (! #xB2) (!! v)]
[(? symbol?) (! #xB3) (!! (string->bytes/utf-8 (symbol->string v)))]
[(record l fs) (! #xB4) (output l) (for-each output fs) (! #x84)]
[(? list?) (! #xB5) (for-each output v) (! #x84)]
[(? set?) (! #xB6) (for-each !!! (sort (map enc (set->list v)) bytes<?)) (! #x84)]
[(? hash?) (! #xB7) (for-each !!! (H v)) (! #x84)]
[(annotated i as) (! #xBF) (output i) (for-each output as) (! #x84)])))
#lang racket
;; Stroop, a different culture's take on Syrup - intended to compare with jelly, argdata,
;; jelly2, blue-jelly, etc.
;; Missing from Syrup are:
;; - annotations
;; - embedded values
;;
;; I've invented syntax for these (#\A and #\E, respectively).
(provide (all-defined-out))
(require "terms.rkt")
;;---------------------------------------------------------------------------
;; Reader
(define (read-stroop [in-port (current-input-port)])
(let/ec return
(define (next-byte) (match (read-byte in-port) [(? eof-object?) (return eof)] [b b]))
(define (next-bytes n)
(define bs (match (read-bytes n in-port) [(? eof-object?) (return eof)] [bs bs]))
(if (< (bytes-length bs) n) (return eof) bs))
(define (next delim)
(match (next-byte)
[#x66 #f]
[#x74 #t]
[#x46 (float (floating-point-bytes->real (next-bytes 4) #t 0 4))]
[#x44 (floating-point-bytes->real (next-bytes 8) #t 0 8)]
[(== delim) '#:end]
[#x41 (let ((a (next #f)))
(match (next #f)
[(annotated i as) (annotated i (cons a as))]
[i (annotated i (list a))]))]
[#x45 (embedded (next #f))]
[v #:when (<= #x30 v #x39)
(define (extend n v) (+ (* n 10) (- v #x30)))
(let collect ((n (extend 0 v)))
(match (next-byte)
[v #:when (<= #x30 v #x39) (collect (extend n v))]
[#x3A (next-bytes n)]
[#x22 (bytes->string/utf-8 (next-bytes n))]
[#x27 (string->symbol (bytes->string/utf-8 (next-bytes n)))]
[#x2B n]
[#x2D (- n)]))]
[#x3C (apply (lambda (label . fields) (record label fields)) (next-items #x3E))]
[#x5B (next-items #x5D)]
[#x23 (list->set (next-items #x24))]
[#x7B (apply hash (next-items #x7D))]))
(define (next-items delim) (match (next delim) ['#:end '()] [v (cons v (next-items delim))]))
(next #f)))
;;---------------------------------------------------------------------------
;; Writer
(define (write-stroop v [o (current-output-port)])
(define (output v)
(match v
[#f (! #x66)]
[#t (! #x74)]
[(float v) (! #x46) (write-bytes (real->floating-point-bytes v 4 #t) o)]
[(? flonum?) (! #x44) (write-bytes (real->floating-point-bytes v 8 #t) o)]
[(annotated v as) (for [(a as)] (! #x41) (output a)) (output v)]
[(embedded v) (! #x45) (output v)]
[(? integer?) #:when (negative? v) (write (- v) o) (! #x2D)]
[(? integer?) (write v o) (! #x2B)]
[(? string?) (output-bytes #x22 (string->bytes/utf-8 v))]
[(? bytes?) (output-bytes #x3A v)]
[(? symbol?) (output-bytes #x27 (string->bytes/utf-8 (symbol->string v)))]
[(record k vs) (output-items #x3C #x3E (cons k vs))]
[(? list?) (output-items #x5B #x5D v)]
[(? set?) (output-encoded #x23 #x24 (sort (map encode (set->list v)) bytes<?))]
[(? hash?) (output-encoded #x7B #x7D
(flatten (sort (for/list [((k w) v)] (cons (encode k) (encode w)))
bytes<? #:key car)))]))
(define (! b) (write-byte b o))
(define (output-bytes sep bs) (write (bytes-length bs) o) (! sep) (write-bytes bs o))
(define (output-items a z vs) (! a) (for-each output vs) (! z))
(define (output-encoded a z bss) (! a) (for [(bs bss)] (write-bytes bs o)) (! z))
(define (encode v) (call-with-output-bytes (lambda (p) (write-stroop v p))))
(output v))
#lang racket
(provide (all-defined-out))
;;---------------------------------------------------------------------------
;; Representing values
(struct record (label fields) #:transparent)
(struct float (value) #:transparent) ;; a marker for single-precision I/O
(struct annotated (item annotations) #:transparent)
(struct embedded (value) #:transparent)
;;---------------------------------------------------------------------------
;; Example terms
(define ex0 (record '(titled person 2 thing 1)
(list 101
"Blackwell"
(record 'date '(1821 2 3))
"Dr")))
(define ex1 '#hash(("Image" . #hash(("Animated" . false)
("Height" . 600)
("IDs" . (116 943 234 38793))
("Thumbnail" . #hash(("Height" . 125)
("Url" . "http://www.example.com/image/481989943")
("Width" . 100)))
("Title" . "View from 15th Floor")
("Width" . 800)))))
(define ex2 '(#hasheq(("Address" . "")
("City" . "SAN FRANCISCO")
("Country" . "US")
("Latitude" . 37.7668)
("Longitude" . -122.3959)
("State" . "CA")
("Zip" . "94107")
("precision" . "zip"))
#hasheq(("Address" . "")
("City" . "SUNNYVALE")
("Country" . "US")
("Latitude" . 37.371991)
("Longitude" . -122.02602)
("State" . "CA")
("Zip" . "94085")
("precision" . "zip"))))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment