Skip to content

Instantly share code, notes, and snippets.

@Kittoes0124
Last active February 28, 2021 22:30
Show Gist options
  • Save Kittoes0124/2615189c339d625732aee6a0ba30da6d to your computer and use it in GitHub Desktop.
Save Kittoes0124/2615189c339d625732aee6a0ba30da6d to your computer and use it in GitHub Desktop.
/*
Name: ByteTerrace Delimited Record (BTDR)
Description: A compact file format for storing structured data.
Glossary:
Delimiter: A symbol that represents the boundary between two values.
Escape Sentinel: A symbol that signals either the beginning or end of a binary sequence.
Field Separator: A symbol that represents a delimiter for fields.
Record Separator: A symbol that represents a delimiter for records.
Specification:
Consistent OverheadByte Stuffing (COBS):
Decode: COBS.Decode
Encode: COBS.Encode
EscapeSentinel (ES): 0x1B (ASCII ESC)
FieldSeparator (FS): 0x1E (ASCII US)
RecordSeparator (RS): 0x1F (ASCII RS)
Basic Example (Text-Only):
[Field0][FS][Field1][FS][Field2][RS]
[Field0][FS][Field1][FS][Field2][RS]
...
Complex Example (Binary-Included, Field2)
[Field0][FS][Field1][FS][ESC]COBS.Encode([Field2])[ESC][RS]
[Field0][FS][Field1][FS][ESC]COBS.Encode([Field2])[ESC][RS]
...
*/
/// <remarks>
/// https://en.wikipedia.org/wiki/Consistent_Overhead_Byte_Stuffing
/// </remarks>
static class BitwiseHelpers
{
public unsafe static int CobsInverse(ReadOnlySpan<byte> source, Span<byte> target, byte sentinelValue = byte.MinValue) {
var isLastByteTruncationRequired = true;
var length = source.Length;
var nextSentinalOffset = byte.MaxValue;
var numBytesRemainingInRun = 0;
var numBytesWrittenToOutput = 0;
fixed (byte* sourcePtr = &MemoryMarshal.GetReference(source))
fixed (byte* targetPtr = &MemoryMarshal.GetReference(target)) {
var b = targetPtr;
var e = (sourcePtr + length);
var s = sourcePtr;
var t = targetPtr;
while (s < e) {
if (numBytesRemainingInRun != 0) {
isLastByteTruncationRequired = false;
*t++ = *s++;
}
else {
if (nextSentinalOffset != byte.MaxValue) {
isLastByteTruncationRequired = true;
*t++ = sentinelValue;
}
numBytesRemainingInRun = nextSentinalOffset = *s++;
if (nextSentinalOffset == sentinelValue) {
break;
}
}
numBytesRemainingInRun--;
}
if (isLastByteTruncationRequired) {
--t;
}
numBytesWrittenToOutput = checked((int)(t - b));
}
return numBytesWrittenToOutput;
}
public unsafe static int Cobs(ReadOnlySpan<byte> source, Span<byte> target, byte sentinelValue = byte.MinValue) {
var length = source.Length;
var numBytesWrittenToOutput = 0;
fixed (byte* sourcePtr = &MemoryMarshal.GetReference(source))
fixed (byte* targetPtr = &MemoryMarshal.GetReference(target)) {
var b = targetPtr;
var s = sourcePtr;
var t = b;
var d = t++;
*d = 1;
while (0 < length--) {
if (sentinelValue != *s) {
*t++ = *s++;
*d += 1;
}
else {
d = t++;
*d = 1;
s++;
}
if ((*d == byte.MaxValue) && (0 < length)) {
d = t++;
*d = 1;
}
}
*t++ = sentinelValue;
numBytesWrittenToOutput += checked((int)(t - b));
}
return numBytesWrittenToOutput;
}
}
static class Constants
{
public const byte EscapeSentinel = 27;
public const byte FieldSeparator = 30;
public const byte RecordSeparator = 31;
}
public sealed class DelimitedRecordIndexer : IDisposable, IEnumerable<IReadOnlyList<long>>, IEnumerator<IReadOnlyList<long>>
{
private readonly byte[] m_bufferValue;
private readonly Stream m_stream;
private int m_bufferLength;
private int m_bufferOffset;
private IReadOnlyList<long> m_current;
public IReadOnlyList<long> Current => m_current;
object IEnumerator.Current => Current;
public DelimitedRecordIndexer(Stream stream, DelimitedRecordIndexerSettings? settings = default) {
settings ??= new DelimitedRecordIndexerSettings { };
m_bufferLength = 0;
m_bufferOffset = 0;
m_bufferValue = new byte[settings.BufferLength];
m_current = Array.AsReadOnly(Array.Empty<long>());
m_stream = stream;
}
private bool FillBuffer() {
m_bufferLength = m_stream.Read(m_bufferValue);
m_bufferOffset = 0;
return (0 < m_bufferLength);
}
public void Dispose() => m_stream.Dispose();
public IEnumerator<IReadOnlyList<long>> GetEnumerator() => this;
IEnumerator IEnumerable.GetEnumerator() => GetEnumerator();
public bool MoveNext() {
var fieldIndices = new List<long>();
var isEscapedSequence = false;
m_current = fieldIndices.AsReadOnly();
while ((m_bufferOffset < m_bufferLength) || FillBuffer()) {
var b = m_bufferValue[m_bufferOffset++];
if (b == Constants.EscapeSentinel) {
isEscapedSequence = !isEscapedSequence;
}
else if (!isEscapedSequence) {
if (b == Constants.FieldSeparator) {
fieldIndices.Add(m_stream.Position - m_bufferLength + m_bufferOffset);
}
else if (b == Constants.RecordSeparator) {
fieldIndices.Add(m_stream.Position - m_bufferLength + m_bufferOffset);
return true;
}
}
}
return false;
}
public void Reset() => throw new NotSupportedException();
}
public sealed class DelimitedRecordIndexerSettings
{
public int BufferLength { get; set; } = 4096;
}
public sealed class DelimitedRecordReader : IDisposable, IEnumerable<IReadOnlyList<string>>, IEnumerator<IReadOnlyList<string>>
{
private readonly IEnumerator<IReadOnlyList<long>> m_indices;
private readonly Stream m_stream;
private IReadOnlyList<string> m_current;
private long m_offset;
public IReadOnlyList<string> Current => m_current;
object IEnumerator.Current => Current;
public DelimitedRecordReader(Stream stream, IEnumerable<IReadOnlyList<long>> indices) {
m_current = Array.AsReadOnly(Array.Empty<string>());
m_indices = indices.GetEnumerator();
m_offset = 0;
m_stream = stream;
}
public void Dispose() => m_indices.Dispose();
public IEnumerator<IReadOnlyList<string>> GetEnumerator() => this;
IEnumerator IEnumerable.GetEnumerator() => GetEnumerator();
public bool MoveNext() {
var fieldIndices = new List<string>();
m_current = fieldIndices.AsReadOnly();
if (m_indices.MoveNext()) {
var indices = m_indices.Current;
foreach(var index in indices) {
var length = ((int)(index - m_offset));
var buffer = ArrayPool<byte>.Shared.Rent(length);
m_offset = index;
m_stream.Read(buffer.AsSpan(0, length));
fieldIndices.Add(Encoding.UTF8.GetString(buffer.AsSpan(0, (length - 1))));
ArrayPool<byte>.Shared.Return(buffer);
}
return true;
}
return false;
}
public void Reset() => throw new NotSupportedException();
}
public sealed class DelimitedRecordWriter
{
private readonly int m_bufferLength;
private readonly Stream m_stream;
private int BufferLength => m_bufferLength;
public DelimitedRecordWriter(Stream stream, DelimitedRecordWriterSettings? settings = default) {
settings ??= new DelimitedRecordWriterSettings { };
m_bufferLength = settings.BufferLength;
m_stream = stream;
}
private void AppendField(ReadOnlySpan<byte> fieldValue, byte separatorValue, Span<byte> buffer, ref int bufferOffset) {
var bufferLength = buffer.Length;
var originalFieldLength = (fieldValue.Length + 1);
var encodedFieldLength = originalFieldLength;
if (-1 < fieldValue.IndexOfAny(Constants.EscapeSentinel, Constants.FieldSeparator, Constants.RecordSeparator)) {
encodedFieldLength = (3 + (originalFieldLength + (originalFieldLength / (byte.MaxValue - 1))));
}
if (bufferLength < encodedFieldLength) {
if (encodedFieldLength != originalFieldLength) {
var encodingBuffer = ArrayPool<byte>.Shared.Rent(encodedFieldLength);
var numBytesWritten = BitwiseHelpers.Cobs(fieldValue, encodingBuffer[1..], Constants.EscapeSentinel);
encodingBuffer[0] = Constants.EscapeSentinel;
m_stream.Write(encodingBuffer[..++numBytesWritten]);
ArrayPool<byte>.Shared.Return(encodingBuffer);
}
else {
m_stream.Write(fieldValue);
}
m_stream.WriteByte(separatorValue);
}
else {
if (bufferLength < (bufferOffset + encodedFieldLength)) {
m_stream.Write(buffer[..bufferOffset]);
bufferOffset = 0;
}
if (encodedFieldLength != originalFieldLength) {
buffer[bufferOffset++] = Constants.EscapeSentinel;
bufferOffset += (1 + BitwiseHelpers.CobsInverse(fieldValue, buffer[bufferOffset..], Constants.EscapeSentinel));
}
else {
fieldValue.CopyTo(buffer[bufferOffset..]);
bufferOffset += encodedFieldLength;
}
buffer[(bufferOffset - 1)] = separatorValue;
}
}
private void AppendFields(IEnumerable<byte[]> fields, Span<byte> bufferValue, ref int bufferOffset) {
var fieldEnumerator = fields.GetEnumerator();
if (fieldEnumerator.MoveNext()) {
var fieldValue = fieldEnumerator.Current;
while (fieldEnumerator.MoveNext()) {
AppendField(fieldValue, Constants.FieldSeparator, bufferValue, ref bufferOffset);
fieldValue = fieldEnumerator.Current;
}
AppendField(fieldValue, Constants.RecordSeparator, bufferValue, ref bufferOffset);
}
}
public void AppendRecords(IEnumerable<IEnumerable<byte[]>> source) {
var bufferOffset = 0;
var bufferValue = ArrayPool<byte>.Shared.Rent(BufferLength);
var sourceEnumerator = source.GetEnumerator();
if (sourceEnumerator.MoveNext()) {
var fields = sourceEnumerator.Current;
while (sourceEnumerator.MoveNext()) {
AppendFields(fields, bufferValue, ref bufferOffset);
fields = sourceEnumerator.Current;
}
AppendFields(fields, bufferValue, ref bufferOffset);
m_stream.Write(bufferValue[..bufferOffset]);
}
ArrayPool<byte>.Shared.Return(bufferValue);
}
}
public sealed class DelimitedRecordWriterSettings
{
public int BufferLength { get; set; } = 4096;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment