Skip to content

Instantly share code, notes, and snippets.

@NickStrupat
Created August 30, 2019 16:48
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save NickStrupat/f05d8993853718a2dbc23de5c7a1c575 to your computer and use it in GitHub Desktop.
Save NickStrupat/f05d8993853718a2dbc23de5c7a1c575 to your computer and use it in GitHub Desktop.
C# UTF-8 rune enumeration
public static class Extensions
{
public static Rune[] ToArray(this SpanUtf8BytesRuneEnumerator enumerator)
{
Span<Rune> runes = stackalloc Rune[enumerator.remaining.Length];
var i = 0;
foreach (var utf8BytesRune in enumerator)
runes[i++] = utf8BytesRune;
var result = new Rune[i];
runes.Slice(0, i).CopyTo(result);
return result;
}
public static SpanUtf8BytesRuneEnumerator EnumerateRunes(this Span<Byte> utf8Bytes)
{
return new SpanUtf8BytesRuneEnumerator(utf8Bytes);
}
public ref struct SpanUtf8BytesRuneEnumerator
{
internal ReadOnlySpan<Byte> remaining;
private Rune current;
internal SpanUtf8BytesRuneEnumerator(ReadOnlySpan<Byte> utf8Bytes)
{
remaining = utf8Bytes;
current = default;
}
public SpanUtf8BytesRuneEnumerator GetEnumerator() => this;
public Rune Current => current;
public Boolean MoveNext()
{
var operationStatus = Rune.DecodeFromUtf8(remaining, out current, out var bytesConsumed);
remaining = remaining.Slice(bytesConsumed);
return operationStatus == OperationStatus.Done;
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment