Skip to content

Instantly share code, notes, and snippets.

@ktakashi
Last active June 16, 2016 14:33
Show Gist options
  • Save ktakashi/99fa3a7b729572fffb8c57d34fef0e24 to your computer and use it in GitHub Desktop.
Save ktakashi/99fa3a7b729572fffb8c57d34fef0e24 to your computer and use it in GitHub Desktop.
O(1) memory k length grep
(import (rnrs)
(prefix (binary io) binary:)
(util bytevector))
;; assume all UTF-8 and EOL is \n
;; It's as close as O(1) memory space.
;; The amount of memory usage is O(m), m is the length of one line.
;; (and I think reading one line isn't that bad comparing storing n lines)
;; It is possible to make this actual O(1) to search port each time set
;; port position. But I don't think that's a good trade off (it doesn't pay off).
(define (grep file pattern k)
(define (compute-pop in off)
(let ((save (port-position in)))
(set-port-position! in off)
(let ((c (bytevector-length (binary:get-line in))))
(set-port-position! in save)
(+ c 1))))
(define (dump in off total)
(define out (standard-output-port))
(set-port-position! in off)
(do ((k (- total off)) (i 0 (+ i 1)))
((= i k))
(put-u8 out (get-u8 in))))
(define pattern-bv (string->utf8 pattern))
(let ((in (open-file-input-port file)))
(let loop ((i 1) (off 0) (pop #f) (total 0))
(let ((line (binary:get-line in)))
;; (print i ", "pop ":" off ":" total)
(cond ((bytevector-contains line pattern-bv)
(dump in off (+ total (bytevector-length line) 1)))
((>= i k)
(loop (+ i 1)
(+ off pop)
(compute-pop in (+ off pop))
(+ total (bytevector-length line) 1)))
(else
(loop (+ i 1)
0 ;; starting point if the line is less than k
(or pop (+ (bytevector-length line) 1))
(+ total (bytevector-length line) 1))))))))
(grep "grep.scm" "pattern-bv" 4)
#| OUTPUT
(do ((k (- total off)) (i 0 (+ i 1)))
((= i k))
(put-u8 out (get-u8 in))))
(define pattern-bv (string->utf8 pattern))
|#
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment