quat1024/ParserExploration2.java

## ParserExploration2.java
package agency.highlysuspect.parsefunny;

import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;

import java.util.ArrayDeque;
import java.util.Deque;
import java.util.stream.Collectors;

//This is sort of a synthesis of the parsers described in these two articles:
//
//matklad - "Simple but Powerful Pratt Parsing"
//  https://matklad.github.io/2020/04/13/simple-but-powerful-pratt-parsing.html
//  Very featureful Pratt parser that can recognize prefix, postfix, infix, and "around"fix operators.
//  Uses a somewhat delicate numeric precedence scheme. Implemented in Rust.
//  *He also has this nice article https://matklad.github.io/2023/05/21/resilient-ll-parsing-tutorial.html
//
//Jamie Brandon (scattered-thoughts) - "Better operator precedence"
//  https://www.scattered-thoughts.net/writing/better-operator-precedence/
//  Introduces a "comparePrecedence" function instead of using numeric precedence.
//  But the given implementation (in Zig) doesn't cover anything other than infix operators.
//
//Wanted the best of both. I think I got it.
//
//There's also this nice article about pratt parsing by the Crafting Interpreters developer:
//Bob Nystrom - Pratt Parsers: Expression Parsing Made Easy
//  https://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/
//
//But it gets a little bogged down in all the Java, so much so that someone wrote a "pratt parsing
//without virtual dispatch or Java" article about it (http://www.oilshell.org/blog/2016/11/03.html),
//and I wanted to redeem Java a bit :) So here's a complete lexer and parser in ~200 lines of commented java 17.

public class ParserExploration2 {
	static void check(boolean arg, String msg) {
		if(!arg) throw new IllegalStateException(msg);
	}

	/// Lexing ///

	enum TokenType {
		EOF,  //token after the end of the file (very common technique to avoid Optional<Token> everywhere)
		ATOM, //stand-in for a "number" or "name literal" token you might have in a real parser
		STAR, SLASH, SQUIGGLE, PLUS, MINUS, DOT, BANG, PERCENT, //grab bag of operators
		LPAREN, RPAREN, //parenthesis
	}

	record Token(TokenType type, String display) {
		@Override public String toString() { return display; }
	}

	static class Lexer {
		//Toy-quality lexer ported from matklad's post, lexes 1-character long operations and literals only.
		//Don't get bogged down with this, it's not the important part.
		Lexer(String in) {
			this.tokens = in.chars()
				.filter(i -> !Character.isWhitespace(i))
				.mapToObj(i -> switch(i) {
					case '*' -> new Token(TokenType.STAR, "*");
					case '/' -> new Token(TokenType.SLASH, "/");
					case '~' -> new Token(TokenType.SQUIGGLE, "~");
					case '+' -> new Token(TokenType.PLUS, "+");
					case '-' -> new Token(TokenType.MINUS, "-");
					case '.' -> new Token(TokenType.DOT, ".");
					case '!' -> new Token(TokenType.BANG, "!");
					case '%' -> new Token(TokenType.PERCENT, "%");
					case '(' -> new Token(TokenType.LPAREN, "(");
					case ')' -> new Token(TokenType.RPAREN, ")");
					default -> new Token(TokenType.ATOM, Character.toString(i));
				}).collect(Collectors.toCollection(ArrayDeque::new));
		}

		//A "peeking iterator" API over the lexemes. Note that there's no way to see two tokens ahead.
		final Deque<Token> tokens;

		Token peek() {
			return tokens.isEmpty() ? new Token(TokenType.EOF, "") : tokens.peekFirst();
		}

		Token pop() {
			return tokens.isEmpty() ? new Token(TokenType.EOF, "") : tokens.pollFirst();
		}
	}

	/// AST sketch. Renders toString as an S-expression. ///

	interface Expr {}
	record Atom(String s) implements Expr {
		@Override public String toString() { return s; }
	}
	record UnaryOp(OperationType op, Expr operand) implements Expr {
		@Override public String toString() { return "(%s %s)".formatted(op, operand); }
	}
	record BinaryOp(OperationType op, Expr left, Expr right) implements Expr {
		@Override public String toString() { return "(%s %s %s)".formatted(op, left, right); }
	}

	/// Parsing ///

	//Entrypoint for the recursive expression parser. In a real parsing application, this would be one of many little functions.
	Expr expr(Lexer lex) {
		return pratt(lex, null); //`null` means "there is no operation to my left to consider binding to".
	}

	//Pratt parser bullshit magic:
	Expr pratt(Lexer lex, @Nullable OperationType leftOp) {
		Token lhsToken = lex.pop();
		Expr lhs = switch(lhsToken.type) {
			case ATOM -> new Atom(lhsToken.toString());
			case LPAREN -> {
				//When crossing into parenthesis, revert to the null operation type.
				//Nothing should be able to bind leftwards *through* an opening paren.
				Expr e = pratt(lex, null);
				check(lex.pop().type == TokenType.RPAREN, "expected closing parenthesis");
				yield e;
			}
			default -> {
				//is it prefix?
				OperationType asPrefix = OperationType.prefixOpFrom(lhsToken);
				check(asPrefix != null, lhsToken + " is not a prefix operation!");
				//we already consumed the prefix operator's token
				Expr rhs = pratt(lex, asPrefix); //recurse to find the right-hand side of the expression
				yield new UnaryOp(asPrefix, rhs); //apply the prefix operator to the *next* operand
				//look for another infix or postfix operation
			}
		};

		while(true) {
			Token opToken = lex.peek();
			//(Special casing for EOF, RParen, etc is not actually needed; xxxOpFrom returning `null` takes care of it.)

			//is it infix?
			OperationType infixOp = OperationType.infixOpFrom(opToken);
			if(infixOp != null && bindsToRight(leftOp, infixOp)) {
				lex.pop(); //consume the infix operator's token
				Expr rhs = pratt(lex, infixOp); //recurse to find the right-hand side of the expression (!HARD to understand!)
				lhs = new BinaryOp(infixOp, lhs, rhs); //apply the infix operator to *both* operands
				continue; //look for another infix or postfix operation
			}

			//is it postfix?
			OperationType postfixOp = OperationType.postfixOpFrom(opToken);
			if(postfixOp != null && bindsToRight(leftOp, postfixOp)) {
				lex.pop(); //consume the postfix operator's token
				lhs = new UnaryOp(postfixOp, lhs); //apply the postfix operator to the *previous* operand
				continue; //look for another infix or postfix operation
			}

			return lhs;
		}
	}

	//Whether the operation on the right is "stronger" than the one on the left.
	static boolean bindsToRight(@Nullable OperationType left, @NotNull OperationType right) {
		if(left == null) return true; //There isn't an operator on the left to even consider binding to!

		//Key observation: this is the only place precedence and rightAssociative are read.
		//The precedence implementation can be swapped out for anything (such as a handwritten table),
		//if manual precedence-number fiddling isn't up your alley. Exceptions for specific pairs of
		//operators can also be added to this method.
		if(left.precedence == right.precedence) return left.rightAssociative; //break ties like this
		else return right.precedence > left.precedence;
	}

	//A token corresponds to zero or more "logical operations" depending on the token's fixity.
	//For example, the - token can be unary minus or subtraction, and unary minus binds tighter than
	//multiplication, but multiplication binds tighter than subtraction. In other words, tokens don't
	//really "have precedence", but (token, fixity) tuples do. The typical way of modelling this is
	//maintaining separate precedence tables: one for prefix operators, one for infix operators, and
	//one for postfix operators. This is a little fiddly, so in the spirit of reducing the amount of
	//"magic numbers" in pratt parsing, I tried mapping each operator and fixity into a *single*
	//OperationType table, giving a name to the operation at the same time (so I can say "factorial"
	//instead of "postfix bang"). Maybe it makes more sense this way? It's probably a little easier to
	//reason about when it comes time to add more operations? I think it's just as flexible? Idk.
	enum OperationType {
		ADD           (20, false, "+"),
		SUBTRACT      (20, false, "-"),
		SQUIGGLE      (30, false, "~"), //Arbitrary binary operator with precedence between + and *
		PERCENT       (30,  true, "%"), //Same, but right-associative
		MULTIPLY      (40, false, "*"),
		DIVIDE        (40, false, "/"),
		UNARY_PLUS    (50, false, "+"),
		UNARY_MINUS   (50, false, "-"),
		FACTORIAL     (60, false, "!"), //Postfix
		UNARY_SQUIGGLE(70, false, "~"),
		COMPOSITION   (100, true, "."); //Right-associative

		OperationType(int precedence, boolean rightAssociative, String display) {
			this.precedence = precedence;
			this.rightAssociative = rightAssociative;
			this.display = display;
		}

		final int precedence;
		final boolean rightAssociative;
		final String display;
		@Override public String toString() { return this.display; }

		//Tables for converting tokens into logical operations, depending on the token's fixity.
		//Note that if a token has infix *and* postfix forms, the main parser loop will just pick one.
		//I think correctly disambiguating cases like that requires backtracking or infinite lookahead.
		//Prefix and infix is okay though.
		static @Nullable OperationType prefixOpFrom(Token tok) {
			return switch(tok.type) {
				case PLUS -> OperationType.UNARY_PLUS;
				case MINUS -> OperationType.UNARY_MINUS;
				case SQUIGGLE -> OperationType.UNARY_SQUIGGLE;
				default -> null;
			};
		}

		static @Nullable OperationType infixOpFrom(Token tok) {
			return switch(tok.type) {
				case PLUS -> OperationType.ADD;
				case MINUS -> OperationType.SUBTRACT;
				case SQUIGGLE -> OperationType.SQUIGGLE;
				case PERCENT -> OperationType.PERCENT;
				case STAR -> OperationType.MULTIPLY;
				case SLASH -> OperationType.DIVIDE;
				case DOT -> OperationType.COMPOSITION;
				default -> null;
			};
		}

		static @Nullable OperationType postfixOpFrom(Token tok) {
			return switch(tok.type) {
				case BANG -> OperationType.FACTORIAL;
				default -> null;
			};
		}
	}

	/// demonstration ///

	void checkParse(String in, String expect) {
		String out = expr(new Lexer(in)).toString();
		System.out.println("in:     " + in);
		System.out.println("out:    " + out);
		System.out.println("expect: " + expect);
		check(out.equals(expect), "Expected " + expect + " but found " + out);
	}

	void go() {
		//left-associative operators
		checkParse("1 + 2 + 3 + 4", "(+ (+ (+ 1 2) 3) 4)");
		checkParse("1 ~ 2 ~ 3 ~ 4", "(~ (~ (~ 1 2) 3) 4)");
		checkParse("1 * 2 * 3 * 4", "(* (* (* 1 2) 3) 4)");
		checkParse("1 + 2 - 3 + 4", "(+ (- (+ 1 2) 3) 4)"); //operators with the same priority
		checkParse("1 * 2 / 3 * 4", "(* (/ (* 1 2) 3) 4)");

		//right-associative operators
		checkParse("1 . 2 . 3 . 4", "(. 1 (. 2 (. 3 4)))");
		checkParse("1 % 2 % 3 % 4", "(% 1 (% 2 (% 3 4)))");

		//mixed associativity among operators with the same precedence
		checkParse("1 ~ 2 ~ 3 % 4", "(% (~ (~ 1 2) 3) 4)");
		checkParse("1 ~ 2 % 3 % 4", "(% (~ 1 2) (% 3 4))");
		checkParse("1 % 2 ~ 3 ~ 4", "(% 1 (~ (~ 2 3) 4))");
		checkParse("1 % 2 % 3 ~ 4", "(% 1 (% 2 (~ 3 4)))");

		//operator precedence
		checkParse("1 + 2 ~ 3", "(+ 1 (~ 2 3))"); //plus is weaker than squiggle
		checkParse("1 * 2 ~ 3", "(~ (* 1 2) 3)"); //star is stronger than squiggle
		checkParse("1 + 2 * 3 + 4", "(+ (+ 1 (* 2 3)) 4)");
		checkParse("1 * 2 + 3 * 4", "(+ (* 1 2) (* 3 4))");

		//prefix operators
		checkParse("-1", "(- 1)");
		checkParse("- - 1", "(- (- 1))");
		checkParse("- + 1", "(- (+ 1))");
		checkParse("- - 1 - 2", "(- (- (- 1)) 2)");
		checkParse("- - 1 - - 2", "(- (- (- 1)) (- 2))");
		checkParse("1--2", "(- 1 (- 2))");
		checkParse("- 1 * - 2", "(* (- 1) (- 2))");

		//postfix operators
		checkParse("5 !", "(! 5)");
		checkParse("5 ! !", "(! (! 5))");
		checkParse("- 5 !", "(- (! 5))"); //unary - is weaker than !
		checkParse("~ 5 !", "(! (~ 5))"); //unary ~ is stronger than !
		checkParse("---5!!!", "(- (- (- (! (! (! 5))))))");
		checkParse("~~~5!!!", "(! (! (! (~ (~ (~ 5))))))");
		checkParse("-~~5!!!", "(- (! (! (! (~ (~ 5))))))");
		checkParse("~-~5!!!", "(~ (- (! (! (! (~ 5))))))");
		checkParse("1 + 3!", "(+ 1 (! 3))");
		checkParse("1 + 3 ! + 5", "(+ (+ 1 (! 3)) 5)");

		//parentheticals
		checkParse("( 1 + 2 ) * 3", "(* (+ 1 2) 3)");
		checkParse("1 + ( 2 * 3 )", "(+ 1 (* 2 3))");
		checkParse("(1+2)*(3+4)", "(* (+ 1 2) (+ 3 4))");
		checkParse("~(1)", "(~ 1)");
		checkParse("~((((((1))))))", "(~ 1)");
		checkParse("~(((~(((1))))))", "(~ (~ 1))");
		checkParse("(4 + 5)!", "(! (+ 4 5))");

		//stupid viral math problems from the internet
		checkParse("6 / 2 * (1 + 2)", "(* (/ 6 2) (+ 1 2))");
		checkParse("2 + 2 * 4", "(+ 2 (* 2 4))");
		checkParse("1 + 1 * 0 + 1", "(+ (+ 1 (* 1 0)) 1)");
	}

	public static void main(String[] args) {
		new ParserExploration2().go();
	}
}

## ParserExploration2_InfixOnly.java
package agency.highlysuspect.parsefunny;

import java.util.ArrayDeque;
import java.util.Deque;
import java.util.stream.Collectors;

//This shorter version strips away the pre/postfix parsing & parenthesis parsing, leaving only the
//core of the algorithm. It's also implemented more traditionally, using a numeric "binding power".

public class ParserExploration2_InfixOnly {
	static void check(boolean arg, String msg) {
		if(!arg) throw new IllegalStateException(msg);
	}

	/// Lexing ///

	enum TokenType {
		EOF,  //token after the end of the file (very common technique to avoid Optional<Token> everywhere)
		ATOM, //stand-in for a "number" or "name literal" token you might have in a real parser
		STAR, SLASH, SQUIGGLE, PLUS, MINUS, DOT, PERCENT, //grab bag of infix binary operators
	}

	record Token(TokenType type, String display) {
		@Override public String toString() { return display; }
	}

	static class Lexer {
		//Toy-quality lexer ported from matklad's post, lexes 1-character long operations and literals only.
		//Don't get bogged down with this, it's not the important part.
		Lexer(String in) {
			this.tokens = in.chars()
				.filter(i -> !Character.isWhitespace(i))
				.mapToObj(i -> switch(i) {
					case '*' -> new Token(TokenType.STAR, "*");
					case '/' -> new Token(TokenType.SLASH, "/");
					case '~' -> new Token(TokenType.SQUIGGLE, "~");
					case '+' -> new Token(TokenType.PLUS, "+");
					case '-' -> new Token(TokenType.MINUS, "-");
					case '.' -> new Token(TokenType.DOT, ".");
					case '%' -> new Token(TokenType.PERCENT, "%");
					default -> new Token(TokenType.ATOM, Character.toString(i));
				}).collect(Collectors.toCollection(ArrayDeque::new));
		}

		//A "peeking iterator" API over the lexemes. Note that there's no way to see two tokens ahead.
		final Deque<Token> tokens;

		Token peek() {
			return tokens.isEmpty() ? new Token(TokenType.EOF, "") : tokens.peekFirst();
		}

		Token pop() {
			return tokens.isEmpty() ? new Token(TokenType.EOF, "") : tokens.pollFirst();
		}
	}

	/// AST sketch. Renders toString as an S-expression. ///

	interface Expr {}
	record Atom(String s) implements Expr {
		@Override public String toString() { return s; }
	}
	record BinaryOp(String op, Expr left, Expr right) implements Expr {
		@Override public String toString() { return "(%s %s %s)".formatted(op, left, right); }
	}

	/// Parsing ///

	static final int NOT_INFIX_OP = 0;

	Expr expr(Lexer lex) {
		return pratt(lex, NOT_INFIX_OP); //"there is no operation to my left to consider binding to"
	}

	Expr pratt(Lexer lex, int minBindingPower) {
		Token lhsToken = lex.pop();
		check(lhsToken.type == TokenType.ATOM, "expected atom");
		Expr lhs = new Atom(lhsToken.toString());

		while(true) {
			Token opToken = lex.peek();
			if(leftPower(opToken) == NOT_INFIX_OP || leftPower(opToken) < minBindingPower) return lhs;

			lex.pop();
			Expr rhs = pratt(lex, rightPower(opToken)); //recurse to find the right-hand side of the expression
			lhs = new BinaryOp(opToken.toString(), lhs, rhs); //^ that is the HARD part to understand about this function
		}
	}

	static int leftPower(Token t) {
		return switch(t.type) {
			case PLUS, MINUS -> 20;
			case SQUIGGLE, PERCENT -> 30;
			case STAR, SLASH -> 40;
			case DOT -> 100;
			default -> NOT_INFIX_OP;
		};
	}

	static int rightPower(Token t) {
		return switch(t.type) {
			case PLUS, MINUS, SQUIGGLE, STAR, SLASH -> leftPower(t) + 1;
			case DOT, PERCENT -> leftPower(t) - 1;
			default -> NOT_INFIX_OP;
		};
	}

	/// demonstration ///

	void checkParse(String in, String expect) {
		String out = expr(new Lexer(in)).toString();
		System.out.println("in:     " + in);
		System.out.println("out:    " + out);
		System.out.println("expect: " + expect);
		check(out.equals(expect), "Expected " + expect + " but found " + out);
	}

	void go() {
		//left-associative operators
		checkParse("1 + 2 + 3 + 4", "(+ (+ (+ 1 2) 3) 4)");
		checkParse("1 ~ 2 ~ 3 ~ 4", "(~ (~ (~ 1 2) 3) 4)");
		checkParse("1 * 2 * 3 * 4", "(* (* (* 1 2) 3) 4)");
		checkParse("1 + 2 - 3 + 4", "(+ (- (+ 1 2) 3) 4)"); //operators with the same priority
		checkParse("1 * 2 / 3 * 4", "(* (/ (* 1 2) 3) 4)");

		//right-associative operators
		checkParse("1 . 2 . 3 . 4", "(. 1 (. 2 (. 3 4)))");
		checkParse("1 % 2 % 3 % 4", "(% 1 (% 2 (% 3 4)))");

		//mixed associativity among operators with the same precedence
		checkParse("1 ~ 2 ~ 3 % 4", "(% (~ (~ 1 2) 3) 4)");
		checkParse("1 ~ 2 % 3 % 4", "(% (~ 1 2) (% 3 4))");
		checkParse("1 % 2 ~ 3 ~ 4", "(% 1 (~ (~ 2 3) 4))");
		checkParse("1 % 2 % 3 ~ 4", "(% 1 (% 2 (~ 3 4)))");

		//operator precedence
		checkParse("1 + 2 ~ 3", "(+ 1 (~ 2 3))"); //plus is weaker than squiggle
		checkParse("1 * 2 ~ 3", "(~ (* 1 2) 3)"); //star is stronger than squiggle
		checkParse("1 + 2 * 3 + 4", "(+ (+ 1 (* 2 3)) 4)");
		checkParse("1 * 2 + 3 * 4", "(+ (* 1 2) (* 3 4))");
	}

	public static void main(String[] args) {
		new ParserExploration2_InfixOnly().go();
	}
}
	package agency.highlysuspect.parsefunny;

	import org.jetbrains.annotations.NotNull;
	import org.jetbrains.annotations.Nullable;

	import java.util.ArrayDeque;
	import java.util.Deque;
	import java.util.stream.Collectors;

	//This is sort of a synthesis of the parsers described in these two articles:
	//
	//matklad - "Simple but Powerful Pratt Parsing"
	// https://matklad.github.io/2020/04/13/simple-but-powerful-pratt-parsing.html
	// Very featureful Pratt parser that can recognize prefix, postfix, infix, and "around"fix operators.
	// Uses a somewhat delicate numeric precedence scheme. Implemented in Rust.
	// *He also has this nice article https://matklad.github.io/2023/05/21/resilient-ll-parsing-tutorial.html
	//
	//Jamie Brandon (scattered-thoughts) - "Better operator precedence"
	// https://www.scattered-thoughts.net/writing/better-operator-precedence/
	// Introduces a "comparePrecedence" function instead of using numeric precedence.
	// But the given implementation (in Zig) doesn't cover anything other than infix operators.
	//
	//Wanted the best of both. I think I got it.
	//
	//There's also this nice article about pratt parsing by the Crafting Interpreters developer:
	//Bob Nystrom - Pratt Parsers: Expression Parsing Made Easy
	// https://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/
	//
	//But it gets a little bogged down in all the Java, so much so that someone wrote a "pratt parsing
	//without virtual dispatch or Java" article about it (http://www.oilshell.org/blog/2016/11/03.html),
	//and I wanted to redeem Java a bit :) So here's a complete lexer and parser in ~200 lines of commented java 17.

	public class ParserExploration2 {
	static void check(boolean arg, String msg) {
	if(!arg) throw new IllegalStateException(msg);
	}

	/// Lexing ///

	enum TokenType {
	EOF, //token after the end of the file (very common technique to avoid Optional<Token> everywhere)
	ATOM, //stand-in for a "number" or "name literal" token you might have in a real parser
	STAR, SLASH, SQUIGGLE, PLUS, MINUS, DOT, BANG, PERCENT, //grab bag of operators
	LPAREN, RPAREN, //parenthesis
	}

	record Token(TokenType type, String display) {
	@Override public String toString() { return display; }
	}

	static class Lexer {
	//Toy-quality lexer ported from matklad's post, lexes 1-character long operations and literals only.
	//Don't get bogged down with this, it's not the important part.
	Lexer(String in) {
	this.tokens = in.chars()
	.filter(i -> !Character.isWhitespace(i))
	.mapToObj(i -> switch(i) {
	case '' -> new Token(TokenType.STAR, "");
	case '/' -> new Token(TokenType.SLASH, "/");
	case '~' -> new Token(TokenType.SQUIGGLE, "~");
	case '+' -> new Token(TokenType.PLUS, "+");
	case '-' -> new Token(TokenType.MINUS, "-");
	case '.' -> new Token(TokenType.DOT, ".");
	case '!' -> new Token(TokenType.BANG, "!");
	case '%' -> new Token(TokenType.PERCENT, "%");
	case '(' -> new Token(TokenType.LPAREN, "(");
	case ')' -> new Token(TokenType.RPAREN, ")");
	default -> new Token(TokenType.ATOM, Character.toString(i));
	}).collect(Collectors.toCollection(ArrayDeque::new));
	}

	//A "peeking iterator" API over the lexemes. Note that there's no way to see two tokens ahead.
	final Deque<Token> tokens;

	Token peek() {
	return tokens.isEmpty() ? new Token(TokenType.EOF, "") : tokens.peekFirst();
	}

	Token pop() {
	return tokens.isEmpty() ? new Token(TokenType.EOF, "") : tokens.pollFirst();
	}
	}

	/// AST sketch. Renders toString as an S-expression. ///

	interface Expr {}
	record Atom(String s) implements Expr {
	@Override public String toString() { return s; }
	}
	record UnaryOp(OperationType op, Expr operand) implements Expr {
	@Override public String toString() { return "(%s %s)".formatted(op, operand); }
	}
	record BinaryOp(OperationType op, Expr left, Expr right) implements Expr {
	@Override public String toString() { return "(%s %s %s)".formatted(op, left, right); }
	}

	/// Parsing ///

	//Entrypoint for the recursive expression parser. In a real parsing application, this would be one of many little functions.
	Expr expr(Lexer lex) {
	return pratt(lex, null); //`null` means "there is no operation to my left to consider binding to".
	}

	//Pratt parser bullshit magic:
	Expr pratt(Lexer lex, @Nullable OperationType leftOp) {
	Token lhsToken = lex.pop();
	Expr lhs = switch(lhsToken.type) {
	case ATOM -> new Atom(lhsToken.toString());
	case LPAREN -> {
	//When crossing into parenthesis, revert to the null operation type.
	//Nothing should be able to bind leftwards through an opening paren.
	Expr e = pratt(lex, null);
	check(lex.pop().type == TokenType.RPAREN, "expected closing parenthesis");
	yield e;
	}
	default -> {
	//is it prefix?
	OperationType asPrefix = OperationType.prefixOpFrom(lhsToken);
	check(asPrefix != null, lhsToken + " is not a prefix operation!");
	//we already consumed the prefix operator's token
	Expr rhs = pratt(lex, asPrefix); //recurse to find the right-hand side of the expression
	yield new UnaryOp(asPrefix, rhs); //apply the prefix operator to the next operand
	//look for another infix or postfix operation
	}
	};

	while(true) {
	Token opToken = lex.peek();
	//(Special casing for EOF, RParen, etc is not actually needed; xxxOpFrom returning `null` takes care of it.)

	//is it infix?
	OperationType infixOp = OperationType.infixOpFrom(opToken);
	if(infixOp != null && bindsToRight(leftOp, infixOp)) {
	lex.pop(); //consume the infix operator's token
	Expr rhs = pratt(lex, infixOp); //recurse to find the right-hand side of the expression (!HARD to understand!)
	lhs = new BinaryOp(infixOp, lhs, rhs); //apply the infix operator to both operands
	continue; //look for another infix or postfix operation
	}

	//is it postfix?
	OperationType postfixOp = OperationType.postfixOpFrom(opToken);
	if(postfixOp != null && bindsToRight(leftOp, postfixOp)) {
	lex.pop(); //consume the postfix operator's token
	lhs = new UnaryOp(postfixOp, lhs); //apply the postfix operator to the previous operand
	continue; //look for another infix or postfix operation
	}

	return lhs;
	}
	}

	//Whether the operation on the right is "stronger" than the one on the left.
	static boolean bindsToRight(@Nullable OperationType left, @NotNull OperationType right) {
	if(left == null) return true; //There isn't an operator on the left to even consider binding to!

	//Key observation: this is the only place precedence and rightAssociative are read.
	//The precedence implementation can be swapped out for anything (such as a handwritten table),
	//if manual precedence-number fiddling isn't up your alley. Exceptions for specific pairs of
	//operators can also be added to this method.
	if(left.precedence == right.precedence) return left.rightAssociative; //break ties like this
	else return right.precedence > left.precedence;
	}

	//A token corresponds to zero or more "logical operations" depending on the token's fixity.
	//For example, the - token can be unary minus or subtraction, and unary minus binds tighter than
	//multiplication, but multiplication binds tighter than subtraction. In other words, tokens don't
	//really "have precedence", but (token, fixity) tuples do. The typical way of modelling this is
	//maintaining separate precedence tables: one for prefix operators, one for infix operators, and
	//one for postfix operators. This is a little fiddly, so in the spirit of reducing the amount of
	//"magic numbers" in pratt parsing, I tried mapping each operator and fixity into a single
	//OperationType table, giving a name to the operation at the same time (so I can say "factorial"
	//instead of "postfix bang"). Maybe it makes more sense this way? It's probably a little easier to
	//reason about when it comes time to add more operations? I think it's just as flexible? Idk.
	enum OperationType {
	ADD (20, false, "+"),
	SUBTRACT (20, false, "-"),
	SQUIGGLE (30, false, "~"), //Arbitrary binary operator with precedence between + and *
	PERCENT (30, true, "%"), //Same, but right-associative
	MULTIPLY (40, false, "*"),
	DIVIDE (40, false, "/"),
	UNARY_PLUS (50, false, "+"),
	UNARY_MINUS (50, false, "-"),
	FACTORIAL (60, false, "!"), //Postfix
	UNARY_SQUIGGLE(70, false, "~"),
	COMPOSITION (100, true, "."); //Right-associative

	OperationType(int precedence, boolean rightAssociative, String display) {
	this.precedence = precedence;
	this.rightAssociative = rightAssociative;
	this.display = display;
	}

	final int precedence;
	final boolean rightAssociative;
	final String display;
	@Override public String toString() { return this.display; }

	//Tables for converting tokens into logical operations, depending on the token's fixity.
	//Note that if a token has infix and postfix forms, the main parser loop will just pick one.
	//I think correctly disambiguating cases like that requires backtracking or infinite lookahead.
	//Prefix and infix is okay though.
	static @Nullable OperationType prefixOpFrom(Token tok) {
	return switch(tok.type) {
	case PLUS -> OperationType.UNARY_PLUS;
	case MINUS -> OperationType.UNARY_MINUS;
	case SQUIGGLE -> OperationType.UNARY_SQUIGGLE;
	default -> null;
	};
	}

	static @Nullable OperationType infixOpFrom(Token tok) {
	return switch(tok.type) {
	case PLUS -> OperationType.ADD;
	case MINUS -> OperationType.SUBTRACT;
	case SQUIGGLE -> OperationType.SQUIGGLE;
	case PERCENT -> OperationType.PERCENT;
	case STAR -> OperationType.MULTIPLY;
	case SLASH -> OperationType.DIVIDE;
	case DOT -> OperationType.COMPOSITION;
	default -> null;
	};
	}

	static @Nullable OperationType postfixOpFrom(Token tok) {
	return switch(tok.type) {
	case BANG -> OperationType.FACTORIAL;
	default -> null;
	};
	}
	}

	/// demonstration ///

	void checkParse(String in, String expect) {
	String out = expr(new Lexer(in)).toString();
	System.out.println("in: " + in);
	System.out.println("out: " + out);
	System.out.println("expect: " + expect);
	check(out.equals(expect), "Expected " + expect + " but found " + out);
	}

	void go() {
	//left-associative operators
	checkParse("1 + 2 + 3 + 4", "(+ (+ (+ 1 2) 3) 4)");
	checkParse("1 ~ 2 ~ 3 ~ 4", "(~ (~ (~ 1 2) 3) 4)");
	checkParse("1 * 2 * 3 * 4", "(* (* (* 1 2) 3) 4)");
	checkParse("1 + 2 - 3 + 4", "(+ (- (+ 1 2) 3) 4)"); //operators with the same priority
	checkParse("1 * 2 / 3 * 4", "(* (/ (* 1 2) 3) 4)");

	//right-associative operators
	checkParse("1 . 2 . 3 . 4", "(. 1 (. 2 (. 3 4)))");
	checkParse("1 % 2 % 3 % 4", "(% 1 (% 2 (% 3 4)))");

	//mixed associativity among operators with the same precedence
	checkParse("1 ~ 2 ~ 3 % 4", "(% (~ (~ 1 2) 3) 4)");
	checkParse("1 ~ 2 % 3 % 4", "(% (~ 1 2) (% 3 4))");
	checkParse("1 % 2 ~ 3 ~ 4", "(% 1 (~ (~ 2 3) 4))");
	checkParse("1 % 2 % 3 ~ 4", "(% 1 (% 2 (~ 3 4)))");

	//operator precedence
	checkParse("1 + 2 ~ 3", "(+ 1 (~ 2 3))"); //plus is weaker than squiggle
	checkParse("1 * 2 ~ 3", "(~ (* 1 2) 3)"); //star is stronger than squiggle
	checkParse("1 + 2 * 3 + 4", "(+ (+ 1 (* 2 3)) 4)");
	checkParse("1 * 2 + 3 * 4", "(+ (* 1 2) (* 3 4))");

	//prefix operators
	checkParse("-1", "(- 1)");
	checkParse("- - 1", "(- (- 1))");
	checkParse("- + 1", "(- (+ 1))");
	checkParse("- - 1 - 2", "(- (- (- 1)) 2)");
	checkParse("- - 1 - - 2", "(- (- (- 1)) (- 2))");
	checkParse("1--2", "(- 1 (- 2))");
	checkParse("- 1 * - 2", "(* (- 1) (- 2))");

	//postfix operators
	checkParse("5 !", "(! 5)");
	checkParse("5 ! !", "(! (! 5))");
	checkParse("- 5 !", "(- (! 5))"); //unary - is weaker than !
	checkParse("~ 5 !", "(! (~ 5))"); //unary ~ is stronger than !
	checkParse("---5!!!", "(- (- (- (! (! (! 5))))))");
	checkParse("~~~5!!!", "(! (! (! (~ (~ (~ 5))))))");
	checkParse("-~~5!!!", "(- (! (! (! (~ (~ 5))))))");
	checkParse("~-~5!!!", "(~ (- (! (! (! (~ 5))))))");
	checkParse("1 + 3!", "(+ 1 (! 3))");
	checkParse("1 + 3 ! + 5", "(+ (+ 1 (! 3)) 5)");

	//parentheticals
	checkParse("( 1 + 2 ) * 3", "(* (+ 1 2) 3)");
	checkParse("1 + ( 2 * 3 )", "(+ 1 (* 2 3))");
	checkParse("(1+2)(3+4)", "( (+ 1 2) (+ 3 4))");
	checkParse("~(1)", "(~ 1)");
	checkParse("~((((((1))))))", "(~ 1)");
	checkParse("~(((~(((1))))))", "(~ (~ 1))");
	checkParse("(4 + 5)!", "(! (+ 4 5))");

	//stupid viral math problems from the internet
	checkParse("6 / 2 * (1 + 2)", "(* (/ 6 2) (+ 1 2))");
	checkParse("2 + 2 * 4", "(+ 2 (* 2 4))");
	checkParse("1 + 1 * 0 + 1", "(+ (+ 1 (* 1 0)) 1)");
	}

	public static void main(String[] args) {
	new ParserExploration2().go();
	}
	}