Skip to content

Instantly share code, notes, and snippets.

@aDotInTheVoid
Created April 25, 2024 21:20
Show Gist options
  • Save aDotInTheVoid/d09a5557778a91fb4b06277a23553d43 to your computer and use it in GitHub Desktop.
Save aDotInTheVoid/d09a5557778a91fb4b06277a23553d43 to your computer and use it in GitHub Desktop.
[package]
name="mcve"
edition="2021"
[lib]
path="mcve.rs"
[dependencies]
expect-test = "1.5.0"
winnow = "0.6.6"
#![cfg(test)]
use expect_test::expect;
use std::{
fmt,
iter::{Cloned, Enumerate},
ops,
slice::Iter,
};
use winnow::{
combinator::{delimited, repeat},
error::{ContextError, ParserError},
stream::{ContainsToken, Location, Offset, Stream, StreamIsPartial},
token::one_of,
Located, PResult, Parser,
};
#[derive(Debug, PartialEq, Clone, Copy)]
enum TokenKind {
LParen,
RParen,
// actually more
}
use TokenKind::*;
#[derive(PartialEq, Clone)]
pub struct Token {
kind: TokenKind,
span: ops::Range<usize>,
}
impl ContainsToken<Token> for TokenKind {
fn contains_token(&self, token: Token) -> bool {
token.kind == *self
}
}
impl<'a, 'b, I, E> winnow::Parser<I, Token, E> for TokenKind
where
I: Stream<Token = Token> + StreamIsPartial,
E: ParserError<I>,
{
fn parse_next(&mut self, input: &mut I) -> PResult<Token, E> {
one_of(*self).parse_next(input)
}
}
impl fmt::Debug for Token {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{:?}({:?})", self.kind, self.span)
}
}
fn lex(src: &str) -> Vec<Token> {
let mut r = vec![];
// this is terrible, but it works for our purposes
for (idx, c) in src.chars().enumerate() {
match c {
' ' => {}
'(' => r.push(Token {
kind: LParen,
span: idx..idx + 1,
}),
')' => r.push(Token {
kind: RParen,
span: idx..idx + 1,
}),
_ => panic!("Invalid token"),
}
}
r
}
#[test]
fn test_lexer() {
let toks = lex("( () () )");
let exp = expect![[r#"
[
LParen(0..1),
LParen(2..3),
RParen(3..4),
LParen(5..6),
RParen(6..7),
RParen(8..9),
]
"#]];
exp.assert_debug_eq(&toks);
}
type Input1<'a> = Located<&'a [Token]>;
#[derive(Debug)]
#[allow(dead_code)] // fields only read by `Debug`
struct SExpr {
inner: Vec<SExpr>,
span: ops::Range<usize>,
}
fn sexpr<I>(i: &mut I) -> PResult<SExpr>
where
I: Stream<Token = Token> + Location,
TokenKind: Parser<I, Token, ContextError>,
{
delimited(LParen, repeat(0.., sexpr), RParen)
.with_span()
.map(|(inner, span)| SExpr {
inner: inner,
span: span,
})
.parse_next(i)
}
#[test]
fn test_input_1() {
let toks = lex("( () () )");
let input: Input1 = Located::new(&toks);
let tree = sexpr::<Input1>.parse(input).unwrap();
let exp = expect![[r#"
SExpr {
inner: [
SExpr {
inner: [],
span: 1..3,
},
SExpr {
inner: [],
span: 3..5,
},
],
span: 0..6,
}
"#]];
exp.assert_debug_eq(&tree);
}
#[derive(Debug, Clone)]
struct Input2<'a> {
inner: &'a [Token],
}
impl Location for Input2<'_> {
fn location(&self) -> usize {
match self.inner.first() {
Some(t) => t.span.start,
None => panic!("Can't find location of empty input"),
}
}
}
type IterOffsets<'a> = Enumerate<Cloned<Iter<'a, Token>>>;
type Checkpoint<'a> = winnow::stream::Checkpoint<&'a [Token], &'a [Token]>;
impl<'a> Offset<Checkpoint<'a>> for Input2<'a> {
fn offset_from(&self, start: &Checkpoint<'a>) -> usize {
self.inner.offset_from(start)
}
}
impl<'a> Stream for Input2<'a> {
type Token = Token;
type Slice = &'a [Token];
type IterOffsets = IterOffsets<'a>;
type Checkpoint = Checkpoint<'a>;
fn iter_offsets(&self) -> Self::IterOffsets {
self.inner.iter_offsets()
}
fn eof_offset(&self) -> usize {
self.inner.eof_offset()
}
fn next_token(&mut self) -> Option<Self::Token> {
self.inner.next_token()
}
fn offset_for<P>(&self, predicate: P) -> Option<usize>
where
P: Fn(Self::Token) -> bool,
{
self.inner.offset_for(predicate)
}
fn offset_at(&self, tokens: usize) -> Result<usize, winnow::error::Needed> {
self.inner.offset_at(tokens)
}
fn next_slice(&mut self, offset: usize) -> Self::Slice {
self.inner.next_slice(offset)
}
fn checkpoint(&self) -> Self::Checkpoint {
self.inner.checkpoint()
}
fn reset(&mut self, checkpoint: &Self::Checkpoint) {
self.inner.reset(checkpoint)
}
fn raw(&self) -> &dyn fmt::Debug {
&self.inner
}
}
impl StreamIsPartial for Input2<'_> {
type PartialState = ();
fn complete(&mut self) -> Self::PartialState {}
fn restore_partial(&mut self, _state: Self::PartialState) {}
fn is_partial_supported() -> bool {
false
}
}
#[test]
#[should_panic = "Can't find location of empty input"]
fn test_input_2() {
let toks = lex("( () () )");
let input = Input2 { inner: &toks };
sexpr.parse(input).unwrap();
}
#[derive(Debug, Clone)]
struct Input3<'a> {
inner: &'a [Token],
end_byte_offset: usize,
}
impl Location for Input3<'_> {
fn location(&self) -> usize {
match self.inner.first() {
Some(t) => t.span.start,
None => self.end_byte_offset,
}
}
}
impl<'a> Offset<Checkpoint<'a>> for Input3<'a> {
fn offset_from(&self, start: &Checkpoint<'a>) -> usize {
self.inner.offset_from(start)
}
}
impl<'a> Stream for Input3<'a> {
type Token = Token;
type Slice = &'a [Token];
type IterOffsets = IterOffsets<'a>;
type Checkpoint = Checkpoint<'a>;
fn iter_offsets(&self) -> Self::IterOffsets {
self.inner.iter_offsets()
}
fn eof_offset(&self) -> usize {
self.inner.eof_offset()
}
fn next_token(&mut self) -> Option<Self::Token> {
self.inner.next_token()
}
fn offset_for<P>(&self, predicate: P) -> Option<usize>
where
P: Fn(Self::Token) -> bool,
{
self.inner.offset_for(predicate)
}
fn offset_at(&self, tokens: usize) -> Result<usize, winnow::error::Needed> {
self.inner.offset_at(tokens)
}
fn next_slice(&mut self, offset: usize) -> Self::Slice {
self.inner.next_slice(offset)
}
fn checkpoint(&self) -> Self::Checkpoint {
self.inner.checkpoint()
}
fn reset(&mut self, checkpoint: &Self::Checkpoint) {
self.inner.reset(checkpoint)
}
fn raw(&self) -> &dyn fmt::Debug {
&self.inner
}
}
impl StreamIsPartial for Input3<'_> {
type PartialState = ();
fn complete(&mut self) -> Self::PartialState {}
fn restore_partial(&mut self, _state: Self::PartialState) {}
fn is_partial_supported() -> bool {
false
}
}
#[test]
fn test_input_3() {
let src = "( () () )";
let toks = lex(src);
let input = Input3 {
inner: &toks,
end_byte_offset: src.len(),
};
let tree = sexpr.parse(input).unwrap();
let exp = expect![[r#"
SExpr {
inner: [
SExpr {
inner: [],
span: 2..5,
},
SExpr {
inner: [],
span: 5..8,
},
],
span: 0..9,
}
"#]];
exp.assert_debug_eq(&tree);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment