Skip to content

Instantly share code, notes, and snippets.

@gdsfh
Created August 14, 2013 15:16
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save gdsfh/80f17a917cbd00b3bc66 to your computer and use it in GitHub Desktop.
Save gdsfh/80f17a917cbd00b3bc66 to your computer and use it in GitHub Desktop.
type t =
{ buf_ofs : mutable int
; buf_len : mutable int
; buf_arr : string
; buf_len_to_eof : mutable int
; buf_bs : int
; buf_fd : mutable file_descr
}
;
value read_into_buffer sz b =
let ba = b.buf_arr in
let () (* ba' *) =
exact_read b.buf_fd ba (b.buf_ofs + b.buf_len) sz
in
let () (* buf' *) =
( b.buf_len := b.buf_len + sz
; b.buf_len_to_eof := b.buf_len_to_eof - sz
)
in
()
;
value blit_from_buffer
(buf : t)
(con : string)
(con_ofs_dest : int)
(len : int)
=
let ba = buf.buf_arr in
let () (* ba' *) =
String.blit ba buf.buf_ofs con con_ofs_dest len
in
let () (* buf' *) =
( buf.buf_ofs := buf.buf_ofs + len
; buf.buf_len := buf.buf_len - len
)
in
()
;
value blit_all
(buf : t)
(con : string)
(con_ofs_dest : int)
=
let old_len = buf.buf_len in
if old_len = 0
then
()
else
(* here is proved that old_len > 0 *)
blit_from_buffer
buf
con
con_ofs_dest
old_len
;
value direct_read0
(con : string)
(ofs : int)
(len : int)
(buf : t)
=
(* in OCaml here is just Unix.read + modifying buf.buf_ofs *)
let () = assert (buf.buf_len = 0) in
let () (* con' *) =
exact_read buf.buf_fd con ofs len
in
let () (* buf' *) =
( buf.buf_ofs := (buf.buf_ofs + len) mod buf.buf_bs
; buf.buf_len_to_eof := buf.buf_len_to_eof - len
)
in
()
;
value direct_read
(con : string)
(ofs : int)
(len : int)
(buf : t)
=
let () = assert (len > 0)
in
direct_read0
con
ofs
len
buf
;
value move_buffer_on_bs
(buf : t)
=
(* in OCaml it's just "buf.buf_ofs -= bs" *)
let () = assert (buf.buf_len = 0) in
let () (* buf' *) =
buf.buf_ofs := buf.buf_ofs - buf.buf_bs
in
()
;
value consume_into_substring
(con : string)
(ofs : int)
(len : int)
(buf : t)
=
if len <= buf.buf_len
then
blit_from_buffer
buf
con
ofs
len
else
(* len > buf_len *)
(* offsets are relative to buffer's beginning *)
let enb = buf.buf_ofs + buf.buf_len
(* end of "what we have in buffer" *) in
let enr = buf.buf_ofs + len
(* end of "what we want to read" *) in
let enf = enb + buf.buf_len_to_eof
(* end of "file region we can read" *) in
(* (enr > enb <- len > buf_len) *)
let init_buf_len = buf.buf_len (* to use it as w.(w_buf).(buf_len) *)
in
(* here enf >= enr *)
if enf = enr
then
(* here: enf = enr => caller reads everything up to file eof. So:
blit all from buffer,
direct_read buf.len_to_eof
*)
let () (* w1 *) =
blit_all
buf
con
ofs
in
let l = buf.buf_len_to_eof
in
let () (* w2 *) =
direct_read
con
(ofs + init_buf_len)
l
buf
in
()
else
(* here: enr < enf => caller reads less than "to eof".
a lot of cases.
*)
let bs = buf.buf_bs in
let bs2 = bs * 2 in
if enf <= bs2
then
(* case I: enf <= bs2 => file till eof fits the buffer:
0 bs bs2
|...............|...............|
^buf_ofs ^enb13 ^enb2 ^enf
^enr3 ^enr12
enf <= bs2 ( -> enf <= bs*3)
enb13 <= bs \/ enb2 > bs
here:
read "enf - enb"
(it's greater than 0, because of
"enr < enf /\ enr > enb"),
blit "len".
possible combinations: (enb13, enr12),
(enb13, enr3), (enb2, enr12).
impossible one: (enb2, enr3).
(naming: common digit => possible,
enb2 >= enr3, so impossible.)
*)
let () (* w1 *) =
read_into_buffer
(enf - enb)
buf
in
let () (* wc2 *) =
blit_from_buffer
buf
con
ofs
len
in
()
else
(* enf > bs2 *)
if enf < bs * 3
then
(* here: bs*2 < enf <= bs*3.
cases:
II: enb >= bs: after blitting from buffer
the rest of file fits the buffer.
III: enb < bs: more expensive case: blit_all,
direct_read "enr - enf".
*)
if enb >= bs
then
(* case II:
0 bs bs2 bs*3
|.........|.........|.........|
^buf_ofs ^enb ^enf
^enr1 ^enr2
enf > bs2
enf <= bs*3
enb >= bs
enr1 <= bs2 \/ enr2 > bs2
[bs .. bs2) can be either empty or full, but after
blitting we surely can get rid of [0 .. bs).
here:
blit_all
(buf_ofs' := enb, buf_len' := 0)
move empty buffer to "-bs"
(buf_ofs'' := buf_ofs' - bs, enb'' := buf_ofs'',
enf'' := enf - bs, enr'' := enr - bs)
0 bs bs2
|..........|..........|
^buf_ofs'' ^enf''
^enb'' ^enr2''
^enr1''
read_into_buffer to ofs=buf_ofs'' len=enf''-enb''
(till eof)
blit "enr'' - enb''" to consumer, dest_ofs=ofs+buf_len
*)
let () (* w1 *) =
blit_all
buf
con
ofs
in
let buf_ofs' = enb in
let () (* w2 *) =
move_buffer_on_bs
buf
in
let buf_ofs'' = buf_ofs' - bs in
let enb'' = buf_ofs'' in
let enf'' = enf - bs in
let enr'' = enr - bs in
let () (* w3 *) =
read_into_buffer
(enf'' - enb'')
buf
in
let () (* w4 *) =
blit_from_buffer
buf
con
(ofs + init_buf_len)
(enr'' - enb'')
in
()
else
(* enb < bs *)
(* case III:
0 bs bs2 bs*3
|............|............|............|
^buf_ofs ^enb ^enr1 ^enr2 ^enf
enf > bs2
enf <= bs*3
enb < bs ( -> [0 .. bs) has data)
enr1 <= bs2 \/ enr2 > bs2
here:
blit_all
(buf_ofs' := enb, buf_len' := 0)
direct_read "enr - enb" to ofs "ofs + buf_len"
it will move buffer:
0 bs bs2
|.................|................|
^buf_ofs1'' ^enf1''
^buf_ofs2'' ^enf2''
but we don't care.
*)
let () (* w1 *) =
blit_all
buf
con
ofs
in
let () (* w2 *) =
direct_read
con
(ofs + init_buf_len)
(enr - enb)
buf
in
()
else
(* here: enf > bs3 *)
if enr <= bs2
then
(* case IV: enr <= bs2 (enb < enr -> enb < bs2),
enf is far.
0 bs bs2
|........|........|....~~|~~~~|~~....
^enb1 ^enb2 ^enf
^enr1 ^enr12
here:
read_into_buffer "bs2 - enb",
blit "enr - enb".
*)
let () (* w1 *) =
read_into_buffer
(bs2 - enb)
buf
in
let () (* w2 *) =
blit_from_buffer
buf
con
ofs
len
in
()
else
(* enr > bs2 *)
let enr_block_begin = enr - (enr mod bs) in
(* cases:
_: enr < enr_block_begin: impossible
V: enr = enr_block_begin: no need to read after enr
VI: enr > enr_block_begin,
enf - enr_block_begin > bs2:
we can't fill the buffer with file contents
till eof during last read, so last read will fill
the whole buffer, "bs2".
VII: enr > enr_block_begin,
enf - enr_block_begin <= bs2 -> we can do it,
last read will be "enf - enr_block_begin",
then blit "enr - enr_block_begin"
In any case we will do first:
blit_all,
direct_read0 "enr_block_begin - enb" to "ofs + buf_len"
*)
let () (* w1 *) =
blit_all
buf
con
ofs
in
let () (* w2 *) =
direct_read0
con
(ofs + init_buf_len)
(enr_block_begin - enb)
buf
in
(* here: enr >= enr_block_begin *)
if enr = enr_block_begin
then
(* case V *)
()
else
(* here: enr > enr_block_begin *)
if enf - enr_block_begin > bs2
then
let () (* w3 *) =
read_into_buffer
bs2
buf
in
let () (* w4 *) =
blit_from_buffer
buf
con
(ofs + init_buf_len + enr_block_begin - enb)
(enr - enr_block_begin)
in
()
else
(* here: enf - enr_block_begin <= bs2 *)
let () (* w3 *) =
read_into_buffer
(enf - enr_block_begin)
buf
in
let () (* w4 *) =
blit_from_buffer
buf
con
(ofs + init_buf_len + enr_block_begin - enb)
(enr - enr_block_begin)
in
()
;
value consume_string
(len : int)
(buf : t)
=
if len <= buf.buf_len
then
(* there is no point in creating string then blitting into it;
String.sub from buffer will be faster.
So we'll inline "blit_from_buffer" and change String.blit
to String.sub:
*)
let r = String.sub buf.buf_arr buf.buf_ofs len
in
( buf.buf_ofs := buf.buf_ofs + len
; buf.buf_len := buf.buf_len - len
; r
)
else
(* len > buf.buf_len *)
let r = String.make len '\x00' in
let () (* w' *) =
consume_into_substring
r
0
len
buf
in
r
;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment