-
-
Save aDotInTheVoid/d09a5557778a91fb4b06277a23553d43 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[package] | |
name="mcve" | |
edition="2021" | |
[lib] | |
path="mcve.rs" | |
[dependencies] | |
expect-test = "1.5.0" | |
winnow = "0.6.6" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#![cfg(test)] | |
use expect_test::expect; | |
use std::{ | |
fmt, | |
iter::{Cloned, Enumerate}, | |
ops, | |
slice::Iter, | |
}; | |
use winnow::{ | |
combinator::{delimited, repeat}, | |
error::{ContextError, ParserError}, | |
stream::{ContainsToken, Location, Offset, Stream, StreamIsPartial}, | |
token::one_of, | |
Located, PResult, Parser, | |
}; | |
#[derive(Debug, PartialEq, Clone, Copy)] | |
enum TokenKind { | |
LParen, | |
RParen, | |
// actually more | |
} | |
use TokenKind::*; | |
#[derive(PartialEq, Clone)] | |
pub struct Token { | |
kind: TokenKind, | |
span: ops::Range<usize>, | |
} | |
impl ContainsToken<Token> for TokenKind { | |
fn contains_token(&self, token: Token) -> bool { | |
token.kind == *self | |
} | |
} | |
impl<'a, 'b, I, E> winnow::Parser<I, Token, E> for TokenKind | |
where | |
I: Stream<Token = Token> + StreamIsPartial, | |
E: ParserError<I>, | |
{ | |
fn parse_next(&mut self, input: &mut I) -> PResult<Token, E> { | |
one_of(*self).parse_next(input) | |
} | |
} | |
impl fmt::Debug for Token { | |
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | |
write!(f, "{:?}({:?})", self.kind, self.span) | |
} | |
} | |
fn lex(src: &str) -> Vec<Token> { | |
let mut r = vec![]; | |
// this is terrible, but it works for our purposes | |
for (idx, c) in src.chars().enumerate() { | |
match c { | |
' ' => {} | |
'(' => r.push(Token { | |
kind: LParen, | |
span: idx..idx + 1, | |
}), | |
')' => r.push(Token { | |
kind: RParen, | |
span: idx..idx + 1, | |
}), | |
_ => panic!("Invalid token"), | |
} | |
} | |
r | |
} | |
#[test] | |
fn test_lexer() { | |
let toks = lex("( () () )"); | |
let exp = expect![[r#" | |
[ | |
LParen(0..1), | |
LParen(2..3), | |
RParen(3..4), | |
LParen(5..6), | |
RParen(6..7), | |
RParen(8..9), | |
] | |
"#]]; | |
exp.assert_debug_eq(&toks); | |
} | |
type Input1<'a> = Located<&'a [Token]>; | |
#[derive(Debug)] | |
#[allow(dead_code)] // fields only read by `Debug` | |
struct SExpr { | |
inner: Vec<SExpr>, | |
span: ops::Range<usize>, | |
} | |
fn sexpr<I>(i: &mut I) -> PResult<SExpr> | |
where | |
I: Stream<Token = Token> + Location, | |
TokenKind: Parser<I, Token, ContextError>, | |
{ | |
delimited(LParen, repeat(0.., sexpr), RParen) | |
.with_span() | |
.map(|(inner, span)| SExpr { | |
inner: inner, | |
span: span, | |
}) | |
.parse_next(i) | |
} | |
#[test] | |
fn test_input_1() { | |
let toks = lex("( () () )"); | |
let input: Input1 = Located::new(&toks); | |
let tree = sexpr::<Input1>.parse(input).unwrap(); | |
let exp = expect![[r#" | |
SExpr { | |
inner: [ | |
SExpr { | |
inner: [], | |
span: 1..3, | |
}, | |
SExpr { | |
inner: [], | |
span: 3..5, | |
}, | |
], | |
span: 0..6, | |
} | |
"#]]; | |
exp.assert_debug_eq(&tree); | |
} | |
#[derive(Debug, Clone)] | |
struct Input2<'a> { | |
inner: &'a [Token], | |
} | |
impl Location for Input2<'_> { | |
fn location(&self) -> usize { | |
match self.inner.first() { | |
Some(t) => t.span.start, | |
None => panic!("Can't find location of empty input"), | |
} | |
} | |
} | |
type IterOffsets<'a> = Enumerate<Cloned<Iter<'a, Token>>>; | |
type Checkpoint<'a> = winnow::stream::Checkpoint<&'a [Token], &'a [Token]>; | |
impl<'a> Offset<Checkpoint<'a>> for Input2<'a> { | |
fn offset_from(&self, start: &Checkpoint<'a>) -> usize { | |
self.inner.offset_from(start) | |
} | |
} | |
impl<'a> Stream for Input2<'a> { | |
type Token = Token; | |
type Slice = &'a [Token]; | |
type IterOffsets = IterOffsets<'a>; | |
type Checkpoint = Checkpoint<'a>; | |
fn iter_offsets(&self) -> Self::IterOffsets { | |
self.inner.iter_offsets() | |
} | |
fn eof_offset(&self) -> usize { | |
self.inner.eof_offset() | |
} | |
fn next_token(&mut self) -> Option<Self::Token> { | |
self.inner.next_token() | |
} | |
fn offset_for<P>(&self, predicate: P) -> Option<usize> | |
where | |
P: Fn(Self::Token) -> bool, | |
{ | |
self.inner.offset_for(predicate) | |
} | |
fn offset_at(&self, tokens: usize) -> Result<usize, winnow::error::Needed> { | |
self.inner.offset_at(tokens) | |
} | |
fn next_slice(&mut self, offset: usize) -> Self::Slice { | |
self.inner.next_slice(offset) | |
} | |
fn checkpoint(&self) -> Self::Checkpoint { | |
self.inner.checkpoint() | |
} | |
fn reset(&mut self, checkpoint: &Self::Checkpoint) { | |
self.inner.reset(checkpoint) | |
} | |
fn raw(&self) -> &dyn fmt::Debug { | |
&self.inner | |
} | |
} | |
impl StreamIsPartial for Input2<'_> { | |
type PartialState = (); | |
fn complete(&mut self) -> Self::PartialState {} | |
fn restore_partial(&mut self, _state: Self::PartialState) {} | |
fn is_partial_supported() -> bool { | |
false | |
} | |
} | |
#[test] | |
#[should_panic = "Can't find location of empty input"] | |
fn test_input_2() { | |
let toks = lex("( () () )"); | |
let input = Input2 { inner: &toks }; | |
sexpr.parse(input).unwrap(); | |
} | |
#[derive(Debug, Clone)] | |
struct Input3<'a> { | |
inner: &'a [Token], | |
end_byte_offset: usize, | |
} | |
impl Location for Input3<'_> { | |
fn location(&self) -> usize { | |
match self.inner.first() { | |
Some(t) => t.span.start, | |
None => self.end_byte_offset, | |
} | |
} | |
} | |
impl<'a> Offset<Checkpoint<'a>> for Input3<'a> { | |
fn offset_from(&self, start: &Checkpoint<'a>) -> usize { | |
self.inner.offset_from(start) | |
} | |
} | |
impl<'a> Stream for Input3<'a> { | |
type Token = Token; | |
type Slice = &'a [Token]; | |
type IterOffsets = IterOffsets<'a>; | |
type Checkpoint = Checkpoint<'a>; | |
fn iter_offsets(&self) -> Self::IterOffsets { | |
self.inner.iter_offsets() | |
} | |
fn eof_offset(&self) -> usize { | |
self.inner.eof_offset() | |
} | |
fn next_token(&mut self) -> Option<Self::Token> { | |
self.inner.next_token() | |
} | |
fn offset_for<P>(&self, predicate: P) -> Option<usize> | |
where | |
P: Fn(Self::Token) -> bool, | |
{ | |
self.inner.offset_for(predicate) | |
} | |
fn offset_at(&self, tokens: usize) -> Result<usize, winnow::error::Needed> { | |
self.inner.offset_at(tokens) | |
} | |
fn next_slice(&mut self, offset: usize) -> Self::Slice { | |
self.inner.next_slice(offset) | |
} | |
fn checkpoint(&self) -> Self::Checkpoint { | |
self.inner.checkpoint() | |
} | |
fn reset(&mut self, checkpoint: &Self::Checkpoint) { | |
self.inner.reset(checkpoint) | |
} | |
fn raw(&self) -> &dyn fmt::Debug { | |
&self.inner | |
} | |
} | |
impl StreamIsPartial for Input3<'_> { | |
type PartialState = (); | |
fn complete(&mut self) -> Self::PartialState {} | |
fn restore_partial(&mut self, _state: Self::PartialState) {} | |
fn is_partial_supported() -> bool { | |
false | |
} | |
} | |
#[test] | |
fn test_input_3() { | |
let src = "( () () )"; | |
let toks = lex(src); | |
let input = Input3 { | |
inner: &toks, | |
end_byte_offset: src.len(), | |
}; | |
let tree = sexpr.parse(input).unwrap(); | |
let exp = expect![[r#" | |
SExpr { | |
inner: [ | |
SExpr { | |
inner: [], | |
span: 2..5, | |
}, | |
SExpr { | |
inner: [], | |
span: 5..8, | |
}, | |
], | |
span: 0..9, | |
} | |
"#]]; | |
exp.assert_debug_eq(&tree); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment