lasandell/binary.fsx

## binary.fsx
// I'm using lots of operating overloading voodoo to avoid
// having to specify static type constraints manually.
// I'm also using MemoryStream / BinaryReader which is probably
// slow but it was the easiest way to read values sequentially.
// It's currently just using the size of the receiving datatype
// rather than specifying it, though that could change.

open System
open System.IO

// Just an alias. Used to get dummy values of types for use in overload resolution.
let inline def< ^t > = Unchecked.defaultof< ^t >

// All the overloads for reading the values.
type PrimConv = PrimConv with
    static member ($) (PrimConv, _:byte) = fun (r:BinaryReader) -> r.ReadByte()
    static member ($) (PrimConv, _:double) = fun (r:BinaryReader) -> r.ReadDouble()
    static member ($) (PrimConv, _:int16) = fun (r:BinaryReader) -> r.ReadInt16()
    static member ($) (PrimConv, _:int32) = fun (r:BinaryReader) -> r.ReadInt32()
    static member ($) (PrimConv, _:int64) = fun (r:BinaryReader) -> r.ReadInt64()
    static member ($) (PrimConv, _:sbyte) = fun (r:BinaryReader) -> r.ReadSByte()
    static member ($) (PrimConv, _:single) = fun (r:BinaryReader) -> r.ReadSingle()
    static member ($) (PrimConv, _:uint16) = fun (r:BinaryReader) -> r.ReadUInt16()
    static member ($) (PrimConv, _:uint32) = fun (r:BinaryReader) -> r.ReadUInt32()
    static member ($) (PrimConv, _:uint64) = fun (r:BinaryReader) -> r.ReadUInt64()

// Read a single value.
let inline primConv reader : ^t =
    (PrimConv $ def< ^t >) reader

// All the overloads for populating a tuple.
type TupleConv = TupleConv with
//  This seems to mess things up
//  static  member inline ($) (TupleConv, _) =
//      fun r -> primConv r
    static member inline ($) (TupleConv, (_, _)) =
        fun r -> primConv r, primConv r
    static member inline ($) (TupleConv, (_, _, _)) =
        fun r -> primConv r, primConv r, primConv r
    static member inline ($) (TupleConv, (_, _, _, _)) =
        fun r -> primConv r, primConv r, primConv r, primConv r
    static member inline ($) (TupleConv, (_, _, _, _, _)) =
        fun r -> primConv r, primConv r, primConv r, primConv r, primConv r

// Populate a tuple.
let inline tupleConv bytes : ^t =
    use stream = new MemoryStream(bytes:byte[])
    use reader = new BinaryReader(stream)
    (TupleConv $ def< ^t >) reader

// The main parsing function.
let inline parseBinary bytes =
    tupleConv bytes

// Active pattern to go with the function. Can't figure out
// how to make these work or do anything useful yet.
// let inline (|Binary2|) bytes : ^a * ^b =
//    parseBinary bytes

// Some dummy data: 0x1122334455667788
let data = [|0x11uy;0x22uy;0x33uy;0x44uy;0x55uy;0x66uy;0x77uy;0x88uy|]

// Get binary data via let bindings: type inference automatically
// causes parseBinary function to read correct number of bytes based
// on the type!
let (x:int32), (y:int16), (z:int16) = parseBinary data
printfn "x = %X, y = %X, z = %X" x y z

## binary2_BinaryReader.fs
open System
open System.IO

// Just an alias. Used to get dummy values of types for use in overload resolution.
let inline def< ^t > = Unchecked.defaultof< ^t >

// All the overloads for reading the values.
type PrimitiveConversions =
    static member inline Convert (reader:BinaryReader, _:byte) = reader.ReadByte()
    static member inline Convert (reader:BinaryReader, _:int16) = reader.ReadInt16()
    static member inline Convert (reader:BinaryReader, _:int32) = reader.ReadInt32()
    static member inline Convert (reader:BinaryReader, _:int64) = reader.ReadInt64()
    static member inline Convert (reader:BinaryReader, _:sbyte) = reader.ReadSByte()
    static member inline Convert (reader:BinaryReader, _:float) = reader.ReadDouble()
    static member inline Convert (reader:BinaryReader, _:single) = reader.ReadSingle()
    static member inline Convert (reader:BinaryReader, _:uint16) = reader.ReadUInt16()
    static member inline Convert (reader:BinaryReader, _:uint32) = reader.ReadUInt32()
    static member inline Convert (reader:BinaryReader, _:uint64) = reader.ReadUInt64()

// Needed because we can't specify a specific type (PrimitiveConversions) in the constraint.
let inline primConvHelper< ^c, ^t  when (^c or ^t) : (static member Convert : BinaryReader * ^t -> ^t) > reader =
    ((^c or ^t) : (static member Convert : BinaryReader * ^t -> ^t) (reader, def< ^t >))

// Read a single primitive value. Returns the value and new offset.
let inline primConv reader =
    primConvHelper<PrimitiveConversions, ^t> reader

// All the overloads for populating a tuple. Current supports 1-5 values.
type TupleConversions =
    static member inline Convert (reader, _:^a) : ^a =
        primConv reader
    static member inline Convert (reader, (_, _)) : ^a * ^b =
        primConv reader, primConv reader
    static member inline Convert (reader, (_, _, _)) : ^a * ^b * ^c =
        primConv reader, primConv reader, primConv reader
    static member inline Convert (reader, (_, _, _, _)) : ^a * ^b * ^c * ^d =
        primConv reader, primConv reader, primConv reader, primConv reader
    static member inline Convert (reader, (_, _, _, _, _)) : ^a * ^b * ^c * ^d * ^e =
        primConv reader, primConv reader, primConv reader, primConv reader, primConv reader

// Needed because we can't specify a specific type (TupleConversions) in the constraint.
let inline tupleConvHelper< ^c, ^t  when (^c or ^t) : (static member Convert : BinaryReader * ^t -> ^t) > reader =
    ((^c or ^t) : (static member Convert : BinaryReader * ^t -> ^t) (reader, def< ^t >))

// Read a tuple of primitive values.
let inline tupleConv reader =
    tupleConvHelper<TupleConversions, _> reader

// The main parsing function.
let inline parseBinary bytes =
    use reader = new BinaryReader(new MemoryStream(bytes:byte[]))
    tupleConv reader

// Some dummy data: 0x1122334455667788
let data = [|0x11uy;0x22uy;0x33uy;0x44uy;0x55uy;0x66uy;0x77uy;0x88uy|]

// Get binary data via let bindings: type inference automatically
// causes parseBinary function to read correct number of bytes based
// on the type!
let (x:int32, y:int16, z:int16) = parseBinary data
printfn "x = %X, y = %X, z = %X" x y z

// Example of parsing binary data into a record.
type Record = {X: int32; Y: int16; Z: int16}
let record = parseBinary data |> fun (x, y, z) -> { X = x; Y = y; Z =z }
printfn "x = %X, y = %X, z = %X" record.X record.Y record.Z

// Function to parse a tuple. This is the one I disassembled.
let parseTuple bytes =
    parseBinary bytes : int32 * int16 * int16

## binary2_BitConverter.fs
open System

// Just an alias. Used to get dummy values of types for use in overload resolution.
let inline def< ^t > = Unchecked.defaultof< ^t >

// All the overloads for reading the values.
type PrimitiveConversions =
    static member inline Convert (bytes, offset, _:byte) = (bytes:byte[]).[offset]
    static member inline Convert (bytes, offset, _:int16) = BitConverter.ToInt16(bytes, offset)
    static member inline Convert (bytes, offset, _:int32) = BitConverter.ToInt32(bytes, offset)
    static member inline Convert (bytes, offset, _:int64) = BitConverter.ToInt64(bytes, offset)
    static member inline Convert (bytes, offset, _:sbyte) = (bytes:byte[]).[offset] |> sbyte
    static member inline Convert (bytes, offset, _:float) = BitConverter.ToDouble(bytes, offset)
    static member inline Convert (bytes, offset, _:single) = BitConverter.ToSingle(bytes, offset)
    static member inline Convert (bytes, offset, _:uint16) = BitConverter.ToUInt16(bytes, offset)
    static member inline Convert (bytes, offset, _:uint32) = BitConverter.ToUInt32(bytes, offset)
    static member inline Convert (bytes, offset, _:uint64) = BitConverter.ToUInt64(bytes, offset)

// Needed because we can't specify a specific type (PrimitiveConversions) in the constraint.
let inline primConvHelper< ^c, ^t  when (^c or ^t) : (static member Convert : byte[] * int * ^t -> ^t) > bytes offset =
    ((^c or ^t) : (static member Convert : byte[] * int * ^t -> ^t) (bytes, offset, def< ^t >))

// Read a single primitive value. Returns the value and new offset.
let inline primConv bytes offset =
    primConvHelper<PrimitiveConversions, ^t> bytes offset, offset + sizeof< ^t >

// All the overloads for populating a tuple. Current supports 1-5 values.
type TupleConversions =
    static member inline Convert (bytes, _:^a) : ^a =
        let a, offset = primConv bytes 0
        a
    static member inline Convert (bytes, (_, _)) : ^a * ^b =
        let a, offset = primConv bytes 0
        let b, offset = primConv bytes offset
        a, b
    static member inline Convert (bytes, (_, _, _)) : ^a * ^b * ^c =
        let a, offset = primConv bytes 0
        let b, offset = primConv bytes offset
        let c, offset = primConv bytes offset
        a, b, c
    static member inline Convert (bytes, (_, _, _, _)) : ^a * ^b * ^c * ^d =
        let a, offset = primConv bytes 0
        let b, offset = primConv bytes offset
        let c, offset = primConv bytes offset
        let d, offset = primConv bytes offset
        a, b, c, d
    static member inline Convert (bytes, (_, _, _, _, _)) : ^a * ^b * ^c * ^d * ^e =
        let a, offset = primConv bytes 0
        let b, offset = primConv bytes offset
        let c, offset = primConv bytes offset
        let d, offset = primConv bytes offset
        let e, offset = primConv bytes offset
        a, b, c, d, e

// Needed because we can't specify a specific type (TupleConversions) in the constraint.
let inline tupleConvHelper< ^c, ^t  when (^c or ^t) : (static member Convert : byte[] * ^t -> ^t) > bytes =
    ((^c or ^t) : (static member Convert : byte[] * ^t -> ^t) (bytes, def< ^t >))

// Read a tuple of primitive values.
let inline tupleConv bytes =
    tupleConvHelper<TupleConversions, _> bytes

// The main parsing function. Just calls tupleConv.
let inline parseBinary bytes =
    tupleConv bytes

// Some dummy data: 0x1122334455667788
let data = [|0x11uy;0x22uy;0x33uy;0x44uy;0x55uy;0x66uy;0x77uy;0x88uy|]

// Get binary data via let bindings: type inference automatically
// causes parseBinary function to read correct number of bytes based
// on the type!
let (x:int32, y:int16, z:int16) = parseBinary data
printfn "x = %X, y = %X, z = %X" x y z

// Example of parsing binary data into a record.
type Record = {X: int32; Y: int16; Z: int16}
let record = parseBinary data |> fun (x, y, z) -> { X = x; Y = y; Z =z }
printfn "x = %X, y = %X, z = %X" record.X record.Y record.Z

## notes.md

      
    Raw
  

              notes.md
            
          
    I made two new versions of the parser. At first I thought it would be "faster" to parse directly from a byte array with BitConverter and make everything immutable. The result is here. But then I realized that in most cases you would be parsing lots of data from a stream anyway. So I made another that still uses a BinaryReader.
I also got rid of my weird conversion operators and just wrote the type constraints. All this was in an effort to simplify things so that when compiled the IL would be simple, like you said. Amazingly, due to the power of inlining and optimization, the compiler gets rid of all the tuples and dummy values and in the end you have something that looks like you just wrote the BinaryReader code yourself! You have to compile in release mode, of course.
For example, consider the following function that just parses a tuple:
let parseTuple bytes =
    parseBinary bytes : int32 * int16 * int16
When I disassembled this to C# with Reflector, this is what I got:
public static Tuple<int, short, short> parseTuple(byte[] bytes)
{
    using (BinaryReader reader = new BinaryReader(new MemoryStream(bytes)))
    {
        return new Tuple<int, short, short>(reader.ReadInt32(), reader.ReadInt16(), reader.ReadInt16());
    }
}
Note that the tuple is only there because it's my return type. In other cases (such as a pattern match) it would be optimized away like everything else.
The code has some weirdness about it due to the limitations of the static type constraints. I keep trying things that I think should work but the compiler complains so I end up doing it some other way. I haven't found many comprensive resources on this.
One thing that it doesn't have right now is the ability to parse an array of things (primitives or tuples), which I could probably add. I looked a little at your MD3 parser and it looks like you're parsing some fixed-length strings, which would be hard to specify with this thing since it's all based on types. In order to control the string lengths, I would have to add a tuple of lengths or something as an argument to the parseBinary function, but that would somewhat defeat the purpose by making the code much more verbose.
One thing that would be really cool is if I could create a type provider that allowed me to "annotate" a string type with its length, i.e. string<4>, which would create a static property on the fake "type" to allow me to retrieve the length when I was parsing it. That would seem to solve the problem, if I could figure out how to make it work. I have a feeling that type providers and statically-resolved type parameters might not play all that well together. Also, since string is sealed, I can't actually derive from it, so there might unfortunately have to be a wrapper type. I don't know if you can create fake derived types via erasure, but I'm guessing not.
	// I'm using lots of operating overloading voodoo to avoid
	// having to specify static type constraints manually.
	// I'm also using MemoryStream / BinaryReader which is probably
	// slow but it was the easiest way to read values sequentially.
	// It's currently just using the size of the receiving datatype
	// rather than specifying it, though that could change.

	open System
	open System.IO

	// Just an alias. Used to get dummy values of types for use in overload resolution.
	let inline def< ^t > = Unchecked.defaultof< ^t >

	// All the overloads for reading the values.
	type PrimConv = PrimConv with
	static member ($) (PrimConv, _:byte) = fun (r:BinaryReader) -> r.ReadByte()
	static member ($) (PrimConv, _:double) = fun (r:BinaryReader) -> r.ReadDouble()
	static member ($) (PrimConv, _:int16) = fun (r:BinaryReader) -> r.ReadInt16()
	static member ($) (PrimConv, _:int32) = fun (r:BinaryReader) -> r.ReadInt32()
	static member ($) (PrimConv, _:int64) = fun (r:BinaryReader) -> r.ReadInt64()
	static member ($) (PrimConv, _:sbyte) = fun (r:BinaryReader) -> r.ReadSByte()
	static member ($) (PrimConv, _:single) = fun (r:BinaryReader) -> r.ReadSingle()
	static member ($) (PrimConv, _:uint16) = fun (r:BinaryReader) -> r.ReadUInt16()
	static member ($) (PrimConv, _:uint32) = fun (r:BinaryReader) -> r.ReadUInt32()
	static member ($) (PrimConv, _:uint64) = fun (r:BinaryReader) -> r.ReadUInt64()

	// Read a single value.
	let inline primConv reader : ^t =
	(PrimConv $ def< ^t >) reader

	// All the overloads for populating a tuple.
	type TupleConv = TupleConv with
	// This seems to mess things up
	// static member inline ($) (TupleConv, _) =
	// fun r -> primConv r
	static member inline ($) (TupleConv, (_, _)) =
	fun r -> primConv r, primConv r
	static member inline ($) (TupleConv, (_, _, _)) =
	fun r -> primConv r, primConv r, primConv r
	static member inline ($) (TupleConv, (_, _, _, _)) =
	fun r -> primConv r, primConv r, primConv r, primConv r
	static member inline ($) (TupleConv, (_, _, _, _, _)) =
	fun r -> primConv r, primConv r, primConv r, primConv r, primConv r

	// Populate a tuple.
	let inline tupleConv bytes : ^t =
	use stream = new MemoryStream(bytes:byte[])
	use reader = new BinaryReader(stream)
	(TupleConv $ def< ^t >) reader

	// The main parsing function.
	let inline parseBinary bytes =
	tupleConv bytes

	// Active pattern to go with the function. Can't figure out
	// how to make these work or do anything useful yet.
	// let inline (\|Binary2\|) bytes : ^a * ^b =
	// parseBinary bytes

	// Some dummy data: 0x1122334455667788
	let data = [\|0x11uy;0x22uy;0x33uy;0x44uy;0x55uy;0x66uy;0x77uy;0x88uy\|]

	// Get binary data via let bindings: type inference automatically
	// causes parseBinary function to read correct number of bytes based
	// on the type!
	let (x:int32), (y:int16), (z:int16) = parseBinary data
	printfn "x = %X, y = %X, z = %X" x y z