Skip to content

Instantly share code, notes, and snippets.

@slimsag
Created March 12, 2021 07:29
Show Gist options
  • Save slimsag/5a7ec9eb53d63b027e36a10c9dc86598 to your computer and use it in GitHub Desktop.
Save slimsag/5a7ec9eb53d63b027e36a10c9dc86598 to your computer and use it in GitHub Desktop.
const std = @import("std");
const Allocator = std.mem.Allocator;
pub const Error = error{
EndOfStream,
Utf8InvalidStartByte,
} || std.fs.File.ReadError || std.fs.File.SeekError || std.mem.Allocator.Error;
pub fn Parser(comptime Value: type, comptime Reader: type) type {
return struct {
const Self = @This();
pub const ReaderT = Reader;
pub const ValueT = Value;
_parse: fn (self: *Self, allocator: *Allocator, src: *Reader) Error!?Value,
pub fn parse(self: *Self, allocator: *Allocator, src: *Reader) Error!?Value {
return self._parse(self, allocator, src);
}
};
}
pub fn String(comptime Reader: type) type {
return struct {
parser: P = .{ ._parse = parse },
want: []const u8,
const Self = @This();
pub const P = Parser([]const u8, Reader);
pub fn init(want: []const u8) Self {
return Self{ .want = want };
}
fn parse(parser: *P, allocator: *Allocator, src: *Reader) Error!?[]const u8 {
const self = @fieldParentPtr(Self, "parser", parser);
const buf = try allocator.alloc(u8, self.want.len);
!! buf^ here can leak if the try below fails. I fixed this in the article.
const read = try src.reader().readAll(buf);
std.log.debug("Literal read {} buf {s} want.len {}", .{ read, buf[0..read], self.want.len });
if (read < self.want.len or !std.mem.eql(u8, buf, self.want)) {
try src.seekableStream().seekBy(-@intCast(i64, read));
allocator.free(buf);
return null;
}
return buf;
}
};
}
pub fn OneOf(comptime Value: type, comptime Reader: type) type {
return struct {
parser: P = .{ ._parse = parse },
parsers: []*P,
const Self = @This();
const P = Parser(Value, Reader);
pub fn init(parsers: []*P) Self {
return Self{ .parsers = parsers };
}
fn parse(parser: *P, allocator: *Allocator, src: *Reader) Error!?Value {
const self = @fieldParentPtr(Self, "parser", parser);
for (self.parsers) |one_of_parser| {
const result = try one_of_parser.parse(allocator, src);
if (result != null) {
return result;
}
}
return null;
}
};
}
pub fn Seq(comptime Value: type, comptime Reader: type) type {
return struct {
parser: P = .{ ._parse = parse },
parsers: []*P,
const Self = @This();
const P = Parser(Value, Reader);
pub fn init(parsers: []*P) Self {
return Self{ .parsers = parsers };
}
fn parse(parser: *P, allocator: *Allocator, src: *Reader) Error!?Value {
const self = @fieldParentPtr(Self, "parser", parser);
var results = std.ArrayList(std.meta.Child(Value)).init(allocator);
!! results^ here can leak if the try below fails.
const pos = try src.seekableStream().getPos();
for (self.parsers) |seq_parser| {
const result = try seq_parser.parse(allocator, src);
std.log.debug("result {s}\n", .{result});
if (result == null) {
try src.seekableStream().seekTo(pos);
results.deinit();
return null;
}
try results.appendSlice(result.?);
allocator.free(result.?);
!! ^ this will probably be costly, and it assumes the value from your other parsers are heap-allocated pointers
!! which likely won't always be true (or you won't want it to be for perf.)
}
return results.toOwnedSlice();
}
};
}
pub fn Opt(comptime P: type) type {
return struct {
parser: P = .{ ._parse = parse },
base_parser: *P,
const Self = @This();
pub fn init(base_parser: *P) Self {
return Self{ .base_parser = base_parser };
}
fn parse(parser: *P, allocator: *Allocator, src: *P.ReaderT) Error!?P.ValueT {
const self = @fieldParentPtr(Self, "parser", parser);
const result = if (try self.base_parser.parse(allocator, src)) |res| res else std.mem.zeroes(P.ValueT);
!! I'm not positive but I feel like there'd be a better way to express optionality,
!! this means the parser always succeeds but can return a zero value for an optional parse
!! and you'd have to map it to a more meaningful value by checking if it is zero. Maybe
!! init() could take the zero value to return in the case of no value being parsed?
!! e.g. because not every type's zero value will be useful or valid. Consider an AST node
!! that requires a tag.
std.log.debug("opt result {s}", .{result});
return result;
}
};
}
!! This is a cool idea, I like it!
pub fn ParserFamily(comptime Value: type, comptime Reader: type) type {
return struct {
const P = Parser(Value, Reader);
fn wrap(p: anytype) fn (anytype) callconv(.Inline) *P {
return struct {
fn func(args: anytype) callconv(.Inline) *P {
return &p(args).parser;
}
}.func;
}
fn wrap2(p: anytype) fn ([]*P) callconv(.Inline) *P {
return struct {
fn func(args: []*P) callconv(.Inline) *P {
return &p(args).parser;
}
}.func;
}
pub const str = wrap(String(Reader).init);
pub const oneof = wrap2(OneOf(Value, Reader).init);
pub const seq = wrap2(Seq(Value, Reader).init);
pub const opt = wrap(Opt(P).init);
};
}
// ------------------------------
// Tests
// ------------------------------
const t = std.testing;
fn testCase(input: []const u8, parser: anytype, expected: ?[]const u8, remainder: ?[]const u8) !void {
const allocator = t.allocator;
var reader = std.io.fixedBufferStream(input);
var result = try parser.parse(allocator, &reader);
std.log.debug("result {s}", .{result});
if (expected == null) {
if (result != null) std.log.err("expected null but got {s}", .{result});
t.expect(result == null);
} else {
t.expect(result != null);
t.expectEqualStrings(expected.?, result.?);
allocator.free(result.?);
}
if (remainder) |rem| {
var buf: [0x100]u8 = undefined;
const bytes_read = try reader.read(&buf);
t.expectEqualStrings(rem, buf[0..bytes_read]);
}
}
const Fbs = std.io.FixedBufferStream([]const u8);
const Str = String(Fbs);
const Pf = ParserFamily([]const u8, Fbs);
test "literal" {
try testCase("abcdef", Pf.str("abc"), "abc", "def");
try testCase("abcdef", Pf.str("abce"), null, "abcdef");
try testCase("def", Pf.str("abc"), null, "def");
}
test "oneof" {
var p = Pf.oneof(&.{
Pf.str("cat"),
Pf.str("dog"),
Pf.str("sheep"),
});
try testCase("catdogsheep", p, "cat", "dogsheep");
try testCase("dogsheep", p, "dog", "sheep");
try testCase("sheep", p, "sheep", "");
try testCase("beep", p, null, "beep");
}
test "seq" {
var seq = Pf.seq(&.{
Pf.str("cat"),
Pf.str("dog"),
Pf.str("sheep"),
});
try testCase("catdogsheep", seq, "catdogsheep", "");
try testCase("catdogshee", seq, null, "catdogshee");
try testCase("catdog", seq, null, "catdog");
try testCase("cat", seq, null, "cat");
}
test "seq oneof" {
var p = Pf.seq(&.{
Pf.oneof(&.{
Pf.str("cat"),
Pf.str("dog"),
}),
Pf.str("sheep"),
});
try testCase("catdogsheep", p, null, "catdogsheep");
try testCase("catsheep", p, "catsheep", "");
try testCase("dogsheep", p, "dogsheep", "");
try testCase("cat", p, null, "cat");
try testCase("dog", p, null, "dog");
try testCase("dogshee", p, null, "dogshee");
}
test "oneof seq" {
var p = Pf.oneof(&.{
Pf.seq(&.{
Pf.str("cat"),
Pf.str("dog"),
}),
Pf.str("sheep"),
});
try testCase("catdog", p, "catdog", "");
try testCase("sheep", p, "sheep", "");
try testCase("catsheep", p, null, "catsheep");
try testCase("dogsheep", p, null, "dogsheep");
try testCase("cat", p, null, "cat");
try testCase("dog", p, null, "dog");
try testCase("catdogshee", p, "catdog", "shee");
}
test "opt" {
{
var p = Pf.opt(Pf.str("cat"));
try testCase("catdog", p, "cat", "dog");
try testCase("dog", p, "", "dog");
}
{
var p = Pf.opt(Pf.str("cat"));
try testCase("catdog", p, "cat", "dog");
try testCase("dog", p, "", "dog");
}
{
var p = Pf.opt(Pf.seq(&.{ Pf.str("cat"), Pf.str("dog") }));
try testCase("catdog", p, "catdog", "");
try testCase("cat", p, "", "cat");
try testCase("dog", p, "", "dog");
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment