Last active
June 16, 2016 14:33
-
-
Save ktakashi/99fa3a7b729572fffb8c57d34fef0e24 to your computer and use it in GitHub Desktop.
O(1) memory k length grep
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(import (rnrs) | |
(prefix (binary io) binary:) | |
(util bytevector)) | |
;; assume all UTF-8 and EOL is \n | |
;; It's as close as O(1) memory space. | |
;; The amount of memory usage is O(m), m is the length of one line. | |
;; (and I think reading one line isn't that bad comparing storing n lines) | |
;; It is possible to make this actual O(1) to search port each time set | |
;; port position. But I don't think that's a good trade off (it doesn't pay off). | |
(define (grep file pattern k) | |
(define (compute-pop in off) | |
(let ((save (port-position in))) | |
(set-port-position! in off) | |
(let ((c (bytevector-length (binary:get-line in)))) | |
(set-port-position! in save) | |
(+ c 1)))) | |
(define (dump in off total) | |
(define out (standard-output-port)) | |
(set-port-position! in off) | |
(do ((k (- total off)) (i 0 (+ i 1))) | |
((= i k)) | |
(put-u8 out (get-u8 in)))) | |
(define pattern-bv (string->utf8 pattern)) | |
(let ((in (open-file-input-port file))) | |
(let loop ((i 1) (off 0) (pop #f) (total 0)) | |
(let ((line (binary:get-line in))) | |
;; (print i ", "pop ":" off ":" total) | |
(cond ((bytevector-contains line pattern-bv) | |
(dump in off (+ total (bytevector-length line) 1))) | |
((>= i k) | |
(loop (+ i 1) | |
(+ off pop) | |
(compute-pop in (+ off pop)) | |
(+ total (bytevector-length line) 1))) | |
(else | |
(loop (+ i 1) | |
0 ;; starting point if the line is less than k | |
(or pop (+ (bytevector-length line) 1)) | |
(+ total (bytevector-length line) 1)))))))) | |
(grep "grep.scm" "pattern-bv" 4) | |
#| OUTPUT | |
(do ((k (- total off)) (i 0 (+ i 1))) | |
((= i k)) | |
(put-u8 out (get-u8 in)))) | |
(define pattern-bv (string->utf8 pattern)) | |
|# |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment