Last active
February 28, 2021 22:30
-
-
Save Kittoes0124/2615189c339d625732aee6a0ba30da6d to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
Name: ByteTerrace Delimited Record (BTDR) | |
Description: A compact file format for storing structured data. | |
Glossary: | |
Delimiter: A symbol that represents the boundary between two values. | |
Escape Sentinel: A symbol that signals either the beginning or end of a binary sequence. | |
Field Separator: A symbol that represents a delimiter for fields. | |
Record Separator: A symbol that represents a delimiter for records. | |
Specification: | |
Consistent OverheadByte Stuffing (COBS): | |
Decode: COBS.Decode | |
Encode: COBS.Encode | |
EscapeSentinel (ES): 0x1B (ASCII ESC) | |
FieldSeparator (FS): 0x1E (ASCII US) | |
RecordSeparator (RS): 0x1F (ASCII RS) | |
Basic Example (Text-Only): | |
[Field0][FS][Field1][FS][Field2][RS] | |
[Field0][FS][Field1][FS][Field2][RS] | |
... | |
Complex Example (Binary-Included, Field2) | |
[Field0][FS][Field1][FS][ESC]COBS.Encode([Field2])[ESC][RS] | |
[Field0][FS][Field1][FS][ESC]COBS.Encode([Field2])[ESC][RS] | |
... | |
*/ | |
/// <remarks> | |
/// https://en.wikipedia.org/wiki/Consistent_Overhead_Byte_Stuffing | |
/// </remarks> | |
static class BitwiseHelpers | |
{ | |
public unsafe static int CobsInverse(ReadOnlySpan<byte> source, Span<byte> target, byte sentinelValue = byte.MinValue) { | |
var isLastByteTruncationRequired = true; | |
var length = source.Length; | |
var nextSentinalOffset = byte.MaxValue; | |
var numBytesRemainingInRun = 0; | |
var numBytesWrittenToOutput = 0; | |
fixed (byte* sourcePtr = &MemoryMarshal.GetReference(source)) | |
fixed (byte* targetPtr = &MemoryMarshal.GetReference(target)) { | |
var b = targetPtr; | |
var e = (sourcePtr + length); | |
var s = sourcePtr; | |
var t = targetPtr; | |
while (s < e) { | |
if (numBytesRemainingInRun != 0) { | |
isLastByteTruncationRequired = false; | |
*t++ = *s++; | |
} | |
else { | |
if (nextSentinalOffset != byte.MaxValue) { | |
isLastByteTruncationRequired = true; | |
*t++ = sentinelValue; | |
} | |
numBytesRemainingInRun = nextSentinalOffset = *s++; | |
if (nextSentinalOffset == sentinelValue) { | |
break; | |
} | |
} | |
numBytesRemainingInRun--; | |
} | |
if (isLastByteTruncationRequired) { | |
--t; | |
} | |
numBytesWrittenToOutput = checked((int)(t - b)); | |
} | |
return numBytesWrittenToOutput; | |
} | |
public unsafe static int Cobs(ReadOnlySpan<byte> source, Span<byte> target, byte sentinelValue = byte.MinValue) { | |
var length = source.Length; | |
var numBytesWrittenToOutput = 0; | |
fixed (byte* sourcePtr = &MemoryMarshal.GetReference(source)) | |
fixed (byte* targetPtr = &MemoryMarshal.GetReference(target)) { | |
var b = targetPtr; | |
var s = sourcePtr; | |
var t = b; | |
var d = t++; | |
*d = 1; | |
while (0 < length--) { | |
if (sentinelValue != *s) { | |
*t++ = *s++; | |
*d += 1; | |
} | |
else { | |
d = t++; | |
*d = 1; | |
s++; | |
} | |
if ((*d == byte.MaxValue) && (0 < length)) { | |
d = t++; | |
*d = 1; | |
} | |
} | |
*t++ = sentinelValue; | |
numBytesWrittenToOutput += checked((int)(t - b)); | |
} | |
return numBytesWrittenToOutput; | |
} | |
} | |
static class Constants | |
{ | |
public const byte EscapeSentinel = 27; | |
public const byte FieldSeparator = 30; | |
public const byte RecordSeparator = 31; | |
} | |
public sealed class DelimitedRecordIndexer : IDisposable, IEnumerable<IReadOnlyList<long>>, IEnumerator<IReadOnlyList<long>> | |
{ | |
private readonly byte[] m_bufferValue; | |
private readonly Stream m_stream; | |
private int m_bufferLength; | |
private int m_bufferOffset; | |
private IReadOnlyList<long> m_current; | |
public IReadOnlyList<long> Current => m_current; | |
object IEnumerator.Current => Current; | |
public DelimitedRecordIndexer(Stream stream, DelimitedRecordIndexerSettings? settings = default) { | |
settings ??= new DelimitedRecordIndexerSettings { }; | |
m_bufferLength = 0; | |
m_bufferOffset = 0; | |
m_bufferValue = new byte[settings.BufferLength]; | |
m_current = Array.AsReadOnly(Array.Empty<long>()); | |
m_stream = stream; | |
} | |
private bool FillBuffer() { | |
m_bufferLength = m_stream.Read(m_bufferValue); | |
m_bufferOffset = 0; | |
return (0 < m_bufferLength); | |
} | |
public void Dispose() => m_stream.Dispose(); | |
public IEnumerator<IReadOnlyList<long>> GetEnumerator() => this; | |
IEnumerator IEnumerable.GetEnumerator() => GetEnumerator(); | |
public bool MoveNext() { | |
var fieldIndices = new List<long>(); | |
var isEscapedSequence = false; | |
m_current = fieldIndices.AsReadOnly(); | |
while ((m_bufferOffset < m_bufferLength) || FillBuffer()) { | |
var b = m_bufferValue[m_bufferOffset++]; | |
if (b == Constants.EscapeSentinel) { | |
isEscapedSequence = !isEscapedSequence; | |
} | |
else if (!isEscapedSequence) { | |
if (b == Constants.FieldSeparator) { | |
fieldIndices.Add(m_stream.Position - m_bufferLength + m_bufferOffset); | |
} | |
else if (b == Constants.RecordSeparator) { | |
fieldIndices.Add(m_stream.Position - m_bufferLength + m_bufferOffset); | |
return true; | |
} | |
} | |
} | |
return false; | |
} | |
public void Reset() => throw new NotSupportedException(); | |
} | |
public sealed class DelimitedRecordIndexerSettings | |
{ | |
public int BufferLength { get; set; } = 4096; | |
} | |
public sealed class DelimitedRecordReader : IDisposable, IEnumerable<IReadOnlyList<string>>, IEnumerator<IReadOnlyList<string>> | |
{ | |
private readonly IEnumerator<IReadOnlyList<long>> m_indices; | |
private readonly Stream m_stream; | |
private IReadOnlyList<string> m_current; | |
private long m_offset; | |
public IReadOnlyList<string> Current => m_current; | |
object IEnumerator.Current => Current; | |
public DelimitedRecordReader(Stream stream, IEnumerable<IReadOnlyList<long>> indices) { | |
m_current = Array.AsReadOnly(Array.Empty<string>()); | |
m_indices = indices.GetEnumerator(); | |
m_offset = 0; | |
m_stream = stream; | |
} | |
public void Dispose() => m_indices.Dispose(); | |
public IEnumerator<IReadOnlyList<string>> GetEnumerator() => this; | |
IEnumerator IEnumerable.GetEnumerator() => GetEnumerator(); | |
public bool MoveNext() { | |
var fieldIndices = new List<string>(); | |
m_current = fieldIndices.AsReadOnly(); | |
if (m_indices.MoveNext()) { | |
var indices = m_indices.Current; | |
foreach(var index in indices) { | |
var length = ((int)(index - m_offset)); | |
var buffer = ArrayPool<byte>.Shared.Rent(length); | |
m_offset = index; | |
m_stream.Read(buffer.AsSpan(0, length)); | |
fieldIndices.Add(Encoding.UTF8.GetString(buffer.AsSpan(0, (length - 1)))); | |
ArrayPool<byte>.Shared.Return(buffer); | |
} | |
return true; | |
} | |
return false; | |
} | |
public void Reset() => throw new NotSupportedException(); | |
} | |
public sealed class DelimitedRecordWriter | |
{ | |
private readonly int m_bufferLength; | |
private readonly Stream m_stream; | |
private int BufferLength => m_bufferLength; | |
public DelimitedRecordWriter(Stream stream, DelimitedRecordWriterSettings? settings = default) { | |
settings ??= new DelimitedRecordWriterSettings { }; | |
m_bufferLength = settings.BufferLength; | |
m_stream = stream; | |
} | |
private void AppendField(ReadOnlySpan<byte> fieldValue, byte separatorValue, Span<byte> buffer, ref int bufferOffset) { | |
var bufferLength = buffer.Length; | |
var originalFieldLength = (fieldValue.Length + 1); | |
var encodedFieldLength = originalFieldLength; | |
if (-1 < fieldValue.IndexOfAny(Constants.EscapeSentinel, Constants.FieldSeparator, Constants.RecordSeparator)) { | |
encodedFieldLength = (3 + (originalFieldLength + (originalFieldLength / (byte.MaxValue - 1)))); | |
} | |
if (bufferLength < encodedFieldLength) { | |
if (encodedFieldLength != originalFieldLength) { | |
var encodingBuffer = ArrayPool<byte>.Shared.Rent(encodedFieldLength); | |
var numBytesWritten = BitwiseHelpers.Cobs(fieldValue, encodingBuffer[1..], Constants.EscapeSentinel); | |
encodingBuffer[0] = Constants.EscapeSentinel; | |
m_stream.Write(encodingBuffer[..++numBytesWritten]); | |
ArrayPool<byte>.Shared.Return(encodingBuffer); | |
} | |
else { | |
m_stream.Write(fieldValue); | |
} | |
m_stream.WriteByte(separatorValue); | |
} | |
else { | |
if (bufferLength < (bufferOffset + encodedFieldLength)) { | |
m_stream.Write(buffer[..bufferOffset]); | |
bufferOffset = 0; | |
} | |
if (encodedFieldLength != originalFieldLength) { | |
buffer[bufferOffset++] = Constants.EscapeSentinel; | |
bufferOffset += (1 + BitwiseHelpers.CobsInverse(fieldValue, buffer[bufferOffset..], Constants.EscapeSentinel)); | |
} | |
else { | |
fieldValue.CopyTo(buffer[bufferOffset..]); | |
bufferOffset += encodedFieldLength; | |
} | |
buffer[(bufferOffset - 1)] = separatorValue; | |
} | |
} | |
private void AppendFields(IEnumerable<byte[]> fields, Span<byte> bufferValue, ref int bufferOffset) { | |
var fieldEnumerator = fields.GetEnumerator(); | |
if (fieldEnumerator.MoveNext()) { | |
var fieldValue = fieldEnumerator.Current; | |
while (fieldEnumerator.MoveNext()) { | |
AppendField(fieldValue, Constants.FieldSeparator, bufferValue, ref bufferOffset); | |
fieldValue = fieldEnumerator.Current; | |
} | |
AppendField(fieldValue, Constants.RecordSeparator, bufferValue, ref bufferOffset); | |
} | |
} | |
public void AppendRecords(IEnumerable<IEnumerable<byte[]>> source) { | |
var bufferOffset = 0; | |
var bufferValue = ArrayPool<byte>.Shared.Rent(BufferLength); | |
var sourceEnumerator = source.GetEnumerator(); | |
if (sourceEnumerator.MoveNext()) { | |
var fields = sourceEnumerator.Current; | |
while (sourceEnumerator.MoveNext()) { | |
AppendFields(fields, bufferValue, ref bufferOffset); | |
fields = sourceEnumerator.Current; | |
} | |
AppendFields(fields, bufferValue, ref bufferOffset); | |
m_stream.Write(bufferValue[..bufferOffset]); | |
} | |
ArrayPool<byte>.Shared.Return(bufferValue); | |
} | |
} | |
public sealed class DelimitedRecordWriterSettings | |
{ | |
public int BufferLength { get; set; } = 4096; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment