Skip to content

Instantly share code, notes, and snippets.

@karlseguin
Created November 13, 2023 01:27
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save karlseguin/17cc66cc26f167c1242608bfae73fb06 to your computer and use it in GitHub Desktop.
Save karlseguin/17cc66cc26f167c1242608bfae73fb06 to your computer and use it in GitHub Desktop.
streamUntilDelimiter Buffered vs Not
const std = @import("std");
const LOOPS = 1000;
pub fn main() !void {
var in: [10000]u8 = undefined;
try std.os.getrandom(&in);
var fbs = std.io.fixedBufferStream(&in);
var out: [10000]u8 = undefined;
var out_stream = std.io.fixedBufferStream(&out);
var out_writer = out_stream.writer();
{
// using BufferedReader directly
// make sure both versions return the same
var found: usize = 0;
var bytes: usize = 0;
out_stream.reset();
var timer = try std.time.Timer.start();
for (0..LOOPS) |_| {
fbs.reset();
out_stream.reset();
var buffered = std.io.bufferedReader(fbs.reader());
while (true) {
streamUntilDelimiter(&buffered, out_writer, '\n', out.len) catch |err| switch (err) {
error.EndOfStream => break,
else => return err,
};
found += 1;
// std.debug.print("{s}\n", .{out_stream.getWritten()});
bytes += out_stream.getWritten().len;
out_stream.reset();
}
}
const elapsed = timer.lap() / 1000;
std.debug.print("Took: {d}us ({d}us / iteration) {d} entries, {d} bytes\n", .{elapsed, elapsed/LOOPS, found, bytes});
}
{
// using io.Reader()
// make sure both versions return the same
var found: usize = 0;
var bytes: usize = 0;
out_stream.reset();
var timer = try std.time.Timer.start();
for (0..LOOPS) |_| {
fbs.reset();
out_stream.reset();
var buffered = std.io.bufferedReader(fbs.reader());
var reader = buffered.reader();
while (true) {
reader.streamUntilDelimiter(out_writer, '\n', out.len) catch |err| switch (err) {
error.EndOfStream => break,
else => return err,
};
found += 1;
// std.debug.print("{s}\n", .{out_stream.getWritten()});
bytes += out_stream.getWritten().len;
out_stream.reset();
}
}
const elapsed = timer.lap() / 1000;
std.debug.print("Took: {d}us ({d}us / iteration) {d} entries, {d} bytes\n", .{elapsed, elapsed/LOOPS, found, bytes});
}
}
fn streamUntilDelimiter(buffered: anytype, writer: anytype, delimiter: u8, optional_max_size: ?usize) !void {
var written: usize = 0;
while (true) {
const start = buffered.start;
const pos = std.mem.indexOfScalar(u8, buffered.buf[start..buffered.end], delimiter) orelse buffered.end - start;
const delimiter_pos = start + pos;
if (optional_max_size) |max| {
written += delimiter_pos - start;
if (written > max) {
return error.StreamTooLong;
}
}
try writer.writeAll(buffered.buf[start..delimiter_pos]);
// Our call to indexOfScalar handles not found by orlse'ing with
// buffered.end - start. This creates a single codepath, above, where
// we check optional_max_size and write into writer. However,
// if indexOfScalar did find the delimiter, then we're done. If
// it didn't, then we need to fill our buffer and keep looking.
if (delimiter_pos != buffered.end) {
// +1 to skip over the delimiter
buffered.start = delimiter_pos + 1;
return;
}
// fill our buffer
const n = try buffered.unbuffered_reader.read(buffered.buf[0..]);
if (n == 0) {
return error.EndOfStream;
}
buffered.start = 0;
buffered.end = n;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment