Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Map lines over an internal buffer - the callback must copy the string if needed, otherwise the underlying data changes.
(defpackage :map-lines (:use :cl))
(in-package :map-lines)
(defparameter *max-line-size* 65536)
(defparameter *default-buffer-size* 16384)
(declaim (inline map-lines-on-shared-buffer))
(define-condition internal-buffer-overflow (error)
((buffer :reader internal-buffer-overflow/buffer :initarg :buffer)
(variable :reader internal-buffer-overflow/variable :initarg :variable)
(size :reader internal-buffer-overflow/size :initarg :size)))
(defun map-lines-on-shared-buffer
(stream function &key (buffer-size *default-buffer-size*))
"Call FUNCTION for each line read from STREAM, using an internal buffer.
FUNCTION should accept a single argument, a string which content is
only available while the callback is processing it. The same
underlying buffer is used for successive invocations of FUNCTION."
(check-type stream stream)
(check-type function (or function symbol))
(check-type buffer-size fixnum)
(when (symbolp function)
(setf function (symbol-function function)))
(let ((view (make-array 0
:element-type 'character
:adjustable t))
(buffer (make-array buffer-size :element-type 'character))
(buffer-start 0) ;; where to start filling the buffer from stream
(buffer-end 0) ;; where buffer filling from stream ended
(start 0) ;; start of current line within buffer (w.r.t. #\newline)
(end 0)) ;; end of current line within buffer
(declare (type function function)
(type string buffer)
(type fixnum buffer-end buffer-start start end)
(dynamic-extent buffer-end buffer-start start end))
(flet ((callback (start end)
(funcall function
(adjust-array view
(- end start)
:displaced-to buffer
:displaced-index-offset start))))
(declare (inline callback))
(block nil
(tagbody
:buffer
;; Fill buffer with characters from the stream. When
;; nothing is read, we can exit the state machine. It is
;; however still possible that there are are characters
;; left between position zero and buffer-start: if so,
;; process that region with the callback function.
(setf buffer-end
(read-sequence buffer stream :start buffer-start))
(when (= buffer-end buffer-start)
(when (plusp buffer-start)
(callback 0 buffer-start))
(return))
:next-line
;; Try to find the end of line. If we reach the end of the
;; buffer, go instead to :COPY, which shifts the content on
;; the "left" to make room for more characters to buffer.
(setf end
(or (position #\newline
buffer
:test #'char=
:start start
:end buffer-end)
(go :copy)))
;; The whole line fits in the buffer, from start to end.
(callback start end)
(setf start (1+ end))
(go :next-line)
:copy
;; not enough room, copy the latest line fragment (the one
;; being currently read) at the beginning of the buffer. If
;; we already are at the beginning (ZEROP START), then we
;; try to extend to buffer instead.
(when (zerop start)
(go :extend))
(replace buffer buffer :start2 start :end2 buffer-end)
(setf buffer-start (- buffer-end start))
(setf start 0)
;; We shifted the current line on the left, because we did
;; not yet found a newline. Buffer more characters.
(go :buffer)
:extend
(let ((size (array-total-size buffer)))
(setf buffer
(adjust-array
buffer
(if (>= size *max-line-size*)
(restart-case
(error 'internal-buffer-overflow
:buffer buffer
:variable '*max-line-size*
:size size)
(ignore ()
:report "Ignore the limit and extend the buffer."
(* size 2)))
(min *max-line-size* (* size 2)))))
(setf buffer-start size)
(go :buffer))))))))
@christophejunke

This comment has been minimized.

Copy link
Owner Author

christophejunke commented May 16, 2020

This is not the most up-to-date version, there is a case that is not handled as well as it could (read-sequence not filling the buffer, and no newline in that)

@christophejunke

This comment has been minimized.

Copy link
Owner Author

christophejunke commented May 16, 2020

@svetlyak40wt

This comment has been minimized.

Copy link

svetlyak40wt commented May 16, 2020

I tried this code on my log file.

It takes 180ms to loop through lines when counting them:

(defun read-lines-4 (filename)
  (with-open-file (in filename)
    (let ((count 0))
      (declare (type fixnum count))
      (map-lines-on-shared-buffer
       in
       (lambda (s)
         (declare (ignorable s))
         (incf count)))
      count)))

And 300-400ms when collecting strings:

(defun read-lines-4 (filename)
  (with-open-file (in filename)
    (uiop:while-collecting (collect-string)
      (map-lines-on-shared-buffer
       in
       (lambda (s)
         (collect-string (copy-seq s)))))))
@christophejunke

This comment has been minimized.

Copy link
Owner Author

christophejunke commented May 16, 2020

Thanks for your (time)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.