Skip to content

Instantly share code, notes, and snippets.

@routevegetable
Last active April 9, 2016 09:58
Show Gist options
  • Save routevegetable/5999e932f39ea78dc16fbbe7766be43e to your computer and use it in GitHub Desktop.
Save routevegetable/5999e932f39ea78dc16fbbe7766be43e to your computer and use it in GitHub Desktop.
Monadic Java Parsers
/**
* Monadic Parsers for Java!
*/
public abstract class Parser<T> {
/**
* An example
*/
public static void main(String[] args) throws IOException {
// Consume some bytes and add them
Parser<Integer> adderParser = BYTE_PAIR.new Lift<Integer>() {
protected Integer f(List<Integer> value) {
return value.get(0) + value.get(1);
}
};
ParserResult<Integer> sumResult = adderParser.parse(System.in);
System.err.println("Sum: " + sumResult.obj);
}
/**
* A byte-reader
*/
public final static Parser<Integer> UINT_8 = new Parser<Integer>() {
protected ParserResult<Integer> parse(InputStream is) throws IOException {
return new ParserResult<>(1, (Integer)is.read());
}
};
/**
* For reading large numbers of bytes
*/
public final static Parser<byte[]> BYTES(final int n) {
return new Parser<byte[]>() {
protected ParserResult<byte[]> parse(InputStream is) throws IOException {
byte[] arr = new byte[n];
int fill = 0;
while(fill < n) is.read(arr, fill, n-fill);
return new ParserResult<>(n, arr);
}
};
}
/**
* Compose 2 byte-readers to make a byte-pair reader
*/
public final static Parser<List<Integer>> BYTE_PAIR = SEQ(UINT_8, UINT_8);
/**
* Make a u16-reader
*/
public final static Parser<Integer> LE_UINT_16 = BYTE_PAIR.new Lift<Integer>() {
protected Integer f(List<Integer> value) {
return value.get(0) | (value.get(1) << 8);
}
};
/**
* Read 3 ints
*/
public final static Parser<List<Integer>> THREE_INTS = SEQ(UINT_8, UINT_8, UINT_8);
/**
* Read a 4KB byte array
*/
public final static Parser<byte[]> FOUR_KB = BYTES(4096);
/**
* Read a variable-length array (prefixed with size-byte)
*/
public final static Parser<byte[]> VAR_BYTE_ARRAY = UINT_8.new Bind<byte[]>() {
protected Parser<byte[]> next(final Integer lenByte) {
return BYTES(lenByte);
}
};
/**
* Read a variable-length array (prefixed with size-byte), but fail the parse if it's longer than a given limit
*/
public final static Parser<byte[]> VAR_BYTE_ARRAY_LIMITED(final int max) {
return UINT_8.new Bind<byte[]>() {
protected Parser<byte[]> next(final Integer lenByte) {
return lenByte <= max ? BYTES(lenByte) : null;
}
};
}
/**
* A string terminated by a newline
*/
public final static Parser<String> LINE_STRING = UINT_8.new Bind<String>() {
protected Parser<String> next(final Integer charCode) {
// Handle the end case.
if(charCode == '\n') return Return("");
// Not end character - combine with rest of string
return LINE_STRING.new Lift<String>() {
protected String f(String str) {
return ((char)(int)charCode) + str;
}
};
}
};
/**
* A number of lines terminated with a "." line
*/
public final static Parser<List<String>> LINE_STRINGS = LINE_STRING.new Bind<List<String>>(){
protected Parser<List<String>> next(String firstLine) {
if(firstLine.equals(".")) {
return Return((List<String>)new ArrayList<String>());
}
return LINE_STRINGS.new Lift<List<String>>() {
protected List<String> f(List<String> nextLines) {
final List<String> result = new ArrayList<>();
result.add(firstLine);
result.addAll(nextLines);
return result;
}
};
}
};
/**
* Read 4KB, and convert to integers
*/
public final static Parser<List<Integer>> FOUR_KB_OF_INTS = FOUR_KB.new Lift<List<Integer>>() {
protected List<Integer> f(byte[] value) {
List<Integer> outs = new ArrayList<>(value.length);
for(int i = 0; i < value.length; i++)
outs.add((int) value[i]);
return outs;
}
};
/**
* Make this functionality generic over Parser<byte[]>
*/
public final static Parser<List<Integer>> BYTES_TO_INTS(Parser<byte[]> in) {
return in.new Lift<List<Integer>>() {
protected List<Integer> f(byte[] value) {
List<Integer> outs = new ArrayList<>(value.length);
for(int i = 0; i < value.length; i++)
outs.add((int) value[i]);
return outs;
}
};
}
/**
* Rewrite of FOUR_KB_OF_INTS to use above generic function
*/
public final static Parser<List<Integer>> FOUR_KB_OF_INTS_BETTER = BYTES_TO_INTS(FOUR_KB);
/**
* An operator which ensures a result is what is expected
*/
public final Parser<T> EXPECT(final T expected) {
return this.new Bind<T>() {
protected Parser<T> next(T actual) {
return ((expected == null && actual == null) || (expected != null && expected.equals(actual))) ? Return(actual) : null;
}
};
}
/**
* A parsing result
*/
public static class ParserResult<T> {
/**
* Number of bytes consumed
*/
public final int read;
/**
* Parsed object
*/
public final T obj;
public ParserResult(int read, T obj) {
this.read = read;
this.obj = obj;
}
}
/**
* Parse the contents of an InputStream
*
* @return A ParserResult<T> on success. Null on failure.
*/
protected abstract ParserResult<T> parse(InputStream is) throws IOException;
/* ---------- Monad Primitives ---------- */
/**
* Bind operator to compose parsers.
*/
public abstract class Bind<U> extends Parser<U> {
protected abstract Parser<U> next(T value);
@Override
protected final ParserResult<U> parse(InputStream is) throws IOException {
ParserResult<T> firstResult = Parser.this.parse(is);
if(firstResult != null) {
ParserResult<U> secondResult = next(firstResult.obj).parse(is);
return new ParserResult<>(firstResult.read + secondResult.read, secondResult.obj);
}
return null;
}
}
/**
* The 'Unit' parser.
*/
public static <T> Parser<T> Return(final T value) {
return new Parser<T>() {
protected final ParserResult<T> parse(InputStream is) throws IOException {
return new ParserResult<>(0, value);
}
};
}
/* ---------- Combinators ---------- */
/**
* Lift a function 'U f(T)', to make it transform this parser's output.
*/
public abstract class Lift<U> extends Parser<U> {
private final Parser<U> p;
public Lift() {
p = Parser.this.new Bind<U>() {
protected Parser<U> next(T value) {
return Return(f(value));
}
};
}
protected final ParserResult<U> parse(InputStream is) throws IOException {
return p.parse(is);
}
protected abstract U f(T value);
}
/**
* Combine a sequence of parsers.
* @param ps
* @return
*/
public static <T> Parser<List<T>> SEQ(Parser<T>... ps) {
return Seq(Arrays.asList(ps)).new Bind<List<T>>() {
protected Parser<List<T>> next(List<T> value) {
return Return(value);
}
};
}
private static <T> Parser<List<T>> Seq(List<Parser<T>> ps) {
if(ps.size() == 0)
return Return((List<T>)new ArrayList<T>());
Parser<T> head = ps.get(0);
final List<Parser<T>> tail = ps.subList(1, ps.size());
return head.new Bind<List<T>>() {
protected Parser<List<T>> next(final T firstValue) {
return Seq(tail).new Bind<List<T>>() {
protected Parser<List<T>> next(List<T> tailValues) {
List<T> result = new ArrayList<>();
result.add(firstValue);
result.addAll(tailValues);
return Return(result);
}
};
}
};
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment