- Want a serialization library which:
- Supports schema evolution (version tolerant)
- Is suitable for persistence
- Suports the rich type system of .NET (cyclic references, generics, polymorphism, referential equality)
- Is fast and compact
- Requires minimal effort to use
- Supports safe/stable serialization of types which the user does not have control over and therefore cannot alter (eg, via surrogates)
- Has a documented wire format
- Supports serialization/deserialization context, so that we can deserialize GrainReferences using it and have them still have seamless access to the runtime.
- Core concepts:
- Simple wire format which supports a minimal number of primitives falling into 4 categories:
- Fixed length (most numerics, unless specifically annotated)
- Variable length (for variable-length integer encoding, useful for length, count, index type properties (relatively small and 0-based in nature))
- Length-prefixed (strings, arrays of fixed-width primitives)
- Tag-delimited (objects, potentially also for collections with non-computed length such as IEnumerable)
- Type information is embedded, but not required for parsing.
- Separation of wire type & runtime type.
- Library of application-defined runtime types are available during encoding & decoding.
- Types can be parameterized (support for generics)
- Types which are not specified in the type library can be explicitly named.
- These named types are runtime specific (i.e, .NET specific)
- Note: may want to restrict this?
- Objects can contain references, including reference cycles.
- Fields are identified by a numeric tag.
- Encoding and decoding support a single pass.
// [W W W] [S S] [F F F]
public const byte WireTypeMask = 0b1110_0000; // The first 3 bits are dedicated to the wire type.
public const byte SchemaTypeMask = 0b0001_1000; // The next 2 bits are dedicated to the schema type specifier, if the schema type is expected.
public const byte FieldIdMask = 0b000_0111; // The final 3 bits are used for the field id, if the field id is expected.
public const byte FieldIdCompleteMask = 0b0000_0111;
/// <summary>
/// Represents a 3-bit wire type, shifted into position
/// </summary>
public enum WireType : byte
{
VarInt = 0b000 << 5, // Followed by a VarInt
TagDelimited = 0b001 << 5, // Followed by field specifiers, then an Extended tag with EndTagDelimited as the extended wire type.
LengthPrefixed = 0b010 << 5, // Followed by VarInt length representing the number of bytes which follow.
Fixed32 = 0b011 << 5, // Followed by 4 bytes
Fixed64 = 0b100 << 5, // Followed by 8 bytes
Fixed128 = 0b101 << 5, // Followed by 16 bytes
Reference = 0b110 << 5, // Followed by a VarInt reference to a previously defined object.
Extended = 0b111 << 5, // This is a control tag. The schema type and embedded field id are invalid. The remaining 5 bits are used for control information.
}
public enum SchemaType : byte
{
Expected = 0b00 << 3, // This value has the type expected by the current schema.
WellKnown = 0b01 << 3, // This value is an instance of a well-known type. Followed by a VarInt type id.
Encoded = 0b10 << 3, // This value is of a named type. Followed by an encoded type name.
Referenced = 0b11 << 3, // This value is of a type which was previously specified. Followed by a VarInt indicating which previous type is being reused.
}
public enum ExtendedWireType : byte
{
EndTagDelimited = 0b00 << 3, // This tag marks the end of a tag-delimited object. Field id is invalid.
}