-
-
Save thargy/6036443bfec0cc6f08b7102dd6c252b1 to your computer and use it in GitHub Desktop.
// Licensed to the .NET Foundation under one or more agreements. | |
// The .NET Foundation licenses this file to you under the MIT license. | |
// See the LICENSE file in the project root for more information. | |
namespace System.Buffers | |
{ | |
using System.Runtime.CompilerServices; | |
public static class BuffersExtensions | |
{ | |
/// <summary> | |
/// Tries to get a chunk of output from the sequence. | |
/// </summary> | |
/// <typeparam name="T">Type of the sequence elements.</typeparam> | |
/// <param name="input">The input.</param> | |
/// <param name="position">The position to begin searching the sequence from.</param> | |
/// <param name="output">The output (see remarks).</param> | |
/// <param name="size">The size requested.</param> | |
/// <param name="advance">The advance.</param> | |
/// <param name="buffer">The optional buffer.</param> | |
/// <returns> | |
/// <see langword="true" /> if any more data was returned in <paramref name="output" />; otherwise <see langword="false" />.</returns> | |
/// <remarks><para>If a chunk is returned, the <paramref name="output" /> will contain a maximum of <see cref="size"/> elements. | |
/// This contrasts the existing behaviour of <see cref="ReadOnlySequence{T}.TryGet"/>, where the output may contain more than | |
/// one element as it will contain the remainder of the current sequence segment. This is by design as this method is | |
/// focused on reading a sequence as a series of fixed size chunks, rather than getting the remainder of the current sequence.</para> | |
/// <para>In the same way <paramref name="advance"/> moves the <paramref name="size"/> forward by exactly <see cref="size"/> elements, | |
/// when <see langword="true"/>; otherwise sets it to <see langword="default"/> when the end of sequence is reached.</para> | |
/// <para>If <paramref name="output" />.<see cref="ReadOnlyMemory{T}.Length">Length</see> is less than | |
/// <paramref name="size" /> this indicates the sequence has reached it's end, and a full chunk is not available.</para> | |
/// <para>If a <see cref="buffer" /> is <see cref="Memory{T}.IsEmpty">supplied</see>, and if an allocation is required, | |
/// this buffer will be used as such it must be at least <paramref name="size" /> elements. If no allocation is required the buffer will be unused. This is useful on repeated calls | |
/// as it can allow for a single buffer to be supplied and re-used on each call</para></remarks> | |
[MethodImpl(MethodImplOptions.AggressiveInlining)] | |
public static bool TryGetChunk<T>(this in ReadOnlySequence<T> input, ref SequencePosition position, out ReadOnlySpan<T> output, int size, bool advance = true, in Span<T> buffer = default) | |
{ | |
// Short-cut | |
if (size < 1) | |
{ | |
output = default; | |
return false; | |
} | |
Span<T> result = default; | |
Span<T> remainder = default; | |
int remainderLength = size; | |
SequencePosition nextSegment = position; | |
do | |
{ | |
int segmentLength; | |
if (nextSegment.GetObject() is null || // Pointer is outside of sequence | |
!input.TryGet(ref nextSegment, out ReadOnlyMemory<T> segment) || // No more segments are available | |
(segmentLength = segment.Length) < 1) // See https://github.com/dotnet/corefx/issues/33089 | |
{ | |
// Only happens on first iteration, nothing available | |
output = default; | |
return false; | |
} | |
// Calculate remainder | |
ReadOnlySpan<T> segmentSpan = segment.Span; | |
if (segmentLength >= remainderLength || (nextSegment.GetObject() is null)) | |
{ | |
if (segmentLength > remainderLength) | |
{ | |
// Truncate this segment | |
segmentSpan = segmentSpan.Slice(0, remainderLength); | |
if (advance) | |
{ | |
// Advance the position by remainder length | |
position = input.GetPosition(remainderLength, position); | |
} | |
} | |
else if (advance) | |
{ | |
position = nextSegment; | |
} | |
if (result.IsEmpty) | |
{ | |
output = segmentSpan; | |
} | |
else | |
{ | |
segmentSpan.CopyTo(remainder); | |
remainderLength -= segmentLength; | |
if (remainderLength > 0) | |
{ | |
// The segment didn't fill the remainder so slice the result | |
result = result.Slice(0, result.Length - remainderLength); | |
} | |
output = result; | |
} | |
return true; | |
} | |
if (result.IsEmpty) | |
{ | |
result = buffer.IsEmpty ? new T[size] : buffer; | |
remainder = result; | |
} | |
if (advance) | |
{ | |
position = nextSegment; | |
} | |
segmentSpan.CopyTo(remainder); | |
// Shorten the remainder slice | |
remainder = remainder.Slice(segmentLength); | |
remainderLength = remainder.Length; | |
} while (true); | |
} | |
} | |
} |
thargy
commented
Oct 27, 2018
•
Method | SequenceSize | SegmentSize | BlockSize | Mean | Error | StdDev | Scaled | ScaledSD |
---|---|---|---|---|---|---|---|---|
TryGetChunk | 1 | 0.1 | 1 | 35.11 ns | 0.0757 ns | 0.0043 ns | 0.75 | 0.01 |
SliceCopyTo | 1 | 0.1 | 1 | 46.71 ns | 6.9399 ns | 0.3921 ns | 1.00 | 0.00 |
SliceCopyNew | 1 | 0.1 | 1 | 63.02 ns | 4.2804 ns | 0.2418 ns | 1.35 | 0.01 |
TryGetChunk | 1 | 0.1 | 8 | 35.60 ns | 2.6714 ns | 0.1509 ns | 0.75 | 0.00 |
SliceCopyTo | 1 | 0.1 | 8 | 47.38 ns | 0.6350 ns | 0.0359 ns | 1.00 | 0.00 |
SliceCopyNew | 1 | 0.1 | 8 | 63.09 ns | 3.5805 ns | 0.2023 ns | 1.33 | 0.00 |
TryGetChunk | 1 | 0.1 | 81920 | 35.45 ns | 0.0603 ns | 0.0034 ns | 0.76 | 0.00 |
SliceCopyTo | 1 | 0.1 | 81920 | 46.57 ns | 0.7040 ns | 0.0398 ns | 1.00 | 0.00 |
SliceCopyNew | 1 | 0.1 | 81920 | 64.84 ns | 1.1925 ns | 0.0674 ns | 1.39 | 0.00 |
TryGetChunk | 1 | 0.4 | 1 | 35.43 ns | 7.5579 ns | 0.4270 ns | 0.75 | 0.01 |
SliceCopyTo | 1 | 0.4 | 1 | 47.39 ns | 0.4709 ns | 0.0266 ns | 1.00 | 0.00 |
SliceCopyNew | 1 | 0.4 | 1 | 63.47 ns | 1.1935 ns | 0.0674 ns | 1.34 | 0.00 |
TryGetChunk | 1 | 0.4 | 8 | 34.96 ns | 0.1832 ns | 0.0104 ns | 0.75 | 0.00 |
SliceCopyTo | 1 | 0.4 | 8 | 46.75 ns | 0.1188 ns | 0.0067 ns | 1.00 | 0.00 |
SliceCopyNew | 1 | 0.4 | 8 | 63.97 ns | 0.8438 ns | 0.0477 ns | 1.37 | 0.00 |
TryGetChunk | 1 | 0.4 | 81920 | 35.09 ns | 3.0166 ns | 0.1704 ns | 0.75 | 0.00 |
SliceCopyTo | 1 | 0.4 | 81920 | 46.57 ns | 1.8770 ns | 0.1061 ns | 1.00 | 0.00 |
SliceCopyNew | 1 | 0.4 | 81920 | 62.98 ns | 0.2920 ns | 0.0165 ns | 1.35 | 0.00 |
TryGetChunk | 1 | 1 | 1 | 35.05 ns | 3.2711 ns | 0.1848 ns | 0.76 | 0.00 |
SliceCopyTo | 1 | 1 | 1 | 46.38 ns | 0.8914 ns | 0.0504 ns | 1.00 | 0.00 |
SliceCopyNew | 1 | 1 | 1 | 63.56 ns | 14.2743 ns | 0.8065 ns | 1.37 | 0.01 |
TryGetChunk | 1 | 1 | 8 | 35.53 ns | 0.5488 ns | 0.0310 ns | 0.76 | 0.00 |
SliceCopyTo | 1 | 1 | 8 | 46.81 ns | 3.7416 ns | 0.2114 ns | 1.00 | 0.00 |
SliceCopyNew | 1 | 1 | 8 | 63.36 ns | 11.5906 ns | 0.6549 ns | 1.35 | 0.01 |
TryGetChunk | 1 | 1 | 81920 | 35.24 ns | 0.8808 ns | 0.0498 ns | 0.75 | 0.00 |
SliceCopyTo | 1 | 1 | 81920 | 46.76 ns | 2.2220 ns | 0.1255 ns | 1.00 | 0.00 |
SliceCopyNew | 1 | 1 | 81920 | 63.52 ns | 10.6904 ns | 0.6040 ns | 1.36 | 0.01 |
TryGetChunk | 1000 | 0.1 | 1 | 16,207.63 ns | 81.8012 ns | 4.6219 ns | 0.36 | 0.00 |
SliceCopyTo | 1000 | 0.1 | 1 | 45,556.15 ns | 5,218.3483 ns | 294.8464 ns | 1.00 | 0.00 |
SliceCopyNew | 1000 | 0.1 | 1 | 54,618.79 ns | 11,779.3987 ns | 665.5579 ns | 1.20 | 0.01 |
TryGetChunk | 1000 | 0.1 | 8 | 2,125.60 ns | 16.3101 ns | 0.9216 ns | 0.36 | 0.00 |
SliceCopyTo | 1000 | 0.1 | 8 | 5,987.04 ns | 378.7742 ns | 21.4014 ns | 1.00 | 0.00 |
SliceCopyNew | 1000 | 0.1 | 8 | 7,066.41 ns | 25.2971 ns | 1.4293 ns | 1.18 | 0.00 |
TryGetChunk | 1000 | 0.1 | 81920 | 210.49 ns | 19.0390 ns | 1.0757 ns | 0.98 | 0.00 |
SliceCopyTo | 1000 | 0.1 | 81920 | 214.42 ns | 1.5311 ns | 0.0865 ns | 1.00 | 0.00 |
SliceCopyNew | 1000 | 0.1 | 81920 | 239.47 ns | 41.7122 ns | 2.3568 ns | 1.12 | 0.01 |
TryGetChunk | 1000 | 0.4 | 1 | 16,067.91 ns | 704.2364 ns | 39.7907 ns | 0.41 | 0.00 |
SliceCopyTo | 1000 | 0.4 | 1 | 39,435.50 ns | 3,238.6385 ns | 182.9891 ns | 1.00 | 0.00 |
SliceCopyNew | 1000 | 0.4 | 1 | 52,782.66 ns | 789.1048 ns | 44.5859 ns | 1.34 | 0.01 |
TryGetChunk | 1000 | 0.4 | 8 | 2,071.42 ns | 1,217.0226 ns | 68.7640 ns | 0.42 | 0.01 |
SliceCopyTo | 1000 | 0.4 | 8 | 4,931.15 ns | 37.4050 ns | 2.1135 ns | 1.00 | 0.00 |
SliceCopyNew | 1000 | 0.4 | 8 | 6,677.68 ns | 1,113.0950 ns | 62.8919 ns | 1.35 | 0.01 |
TryGetChunk | 1000 | 0.4 | 81920 | 91.36 ns | 4.5369 ns | 0.2563 ns | 0.79 | 0.00 |
SliceCopyTo | 1000 | 0.4 | 81920 | 115.09 ns | 2.8835 ns | 0.1629 ns | 1.00 | 0.00 |
SliceCopyNew | 1000 | 0.4 | 81920 | 135.88 ns | 0.4992 ns | 0.0282 ns | 1.18 | 0.00 |
TryGetChunk | 1000 | 1 | 1 | 15,127.86 ns | 1,374.6168 ns | 77.6684 ns | 0.46 | 0.00 |
SliceCopyTo | 1000 | 1 | 1 | 33,042.55 ns | 4,218.6817 ns | 238.3633 ns | 1.00 | 0.00 |
SliceCopyNew | 1000 | 1 | 1 | 47,790.33 ns | 824.9460 ns | 46.6110 ns | 1.45 | 0.01 |
TryGetChunk | 1000 | 1 | 8 | 1,916.98 ns | 368.0294 ns | 20.7943 ns | 0.48 | 0.00 |
SliceCopyTo | 1000 | 1 | 8 | 4,031.53 ns | 20.1829 ns | 1.1404 ns | 1.00 | 0.00 |
SliceCopyNew | 1000 | 1 | 8 | 5,935.10 ns | 34.6723 ns | 1.9591 ns | 1.47 | 0.00 |
TryGetChunk | 1000 | 1 | 81920 | 35.11 ns | 6.2488 ns | 0.3531 ns | 0.52 | 0.00 |
SliceCopyTo | 1000 | 1 | 81920 | 67.82 ns | 1.3987 ns | 0.0790 ns | 1.00 | 0.00 |
SliceCopyNew | 1000 | 1 | 81920 | 84.46 ns | 0.3885 ns | 0.0219 ns | 1.25 | 0.00 |
TryGetChunk | 100000 | 0.1 | 1 | 1,616,748.28 ns | 15,104.9084 ns | 853.4553 ns | 0.36 | 0.00 |
SliceCopyTo | 100000 | 0.1 | 1 | 4,432,609.93 ns | 43,559.3676 ns | 2,461.1850 ns | 1.00 | 0.00 |
SliceCopyNew | 100000 | 0.1 | 1 | 5,399,884.88 ns | 779,747.9765 ns | 44,057.2067 ns | 1.22 | 0.01 |
TryGetChunk | 100000 | 0.1 | 8 | 202,650.95 ns | 10,184.6886 ns | 575.4538 ns | 0.36 | 0.00 |
SliceCopyTo | 100000 | 0.1 | 8 | 557,300.72 ns | 40,635.9979 ns | 2,296.0092 ns | 1.00 | 0.00 |
SliceCopyNew | 100000 | 0.1 | 8 | 660,653.63 ns | 2,189.5992 ns | 123.7164 ns | 1.19 | 0.00 |
SliceCopyNew | 100000 | 0.1 | 81920 | 3,530.48 ns | 16.6025 ns | 0.9381 ns | 0.98 | 0.00 |
TryGetChunk | 100000 | 0.1 | 81920 | 3,578.62 ns | 74.7265 ns | 4.2222 ns | 0.99 | 0.00 |
SliceCopyTo | 100000 | 0.1 | 81920 | 3,597.94 ns | 204.0785 ns | 11.5308 ns | 1.00 | 0.00 |
TryGetChunk | 100000 | 0.4 | 1 | 1,620,093.01 ns | 820.6985 ns | 46.3710 ns | 0.42 | 0.00 |
SliceCopyTo | 100000 | 0.4 | 1 | 3,889,326.13 ns | 22,643.6837 ns | 1,279.4101 ns | 1.00 | 0.00 |
SliceCopyNew | 100000 | 0.4 | 1 | 5,330,647.46 ns | 1,149,405.4897 ns | 64,943.5417 ns | 1.37 | 0.01 |
TryGetChunk | 100000 | 0.4 | 8 | 201,303.36 ns | 7,176.9988 ns | 405.5137 ns | 0.41 | 0.00 |
SliceCopyTo | 100000 | 0.4 | 8 | 487,841.79 ns | 68,171.4087 ns | 3,851.8110 ns | 1.00 | 0.00 |
SliceCopyNew | 100000 | 0.4 | 8 | 652,271.12 ns | 1,291.2192 ns | 72.9563 ns | 1.34 | 0.01 |
TryGetChunk | 100000 | 0.4 | 81920 | 3,387.50 ns | 132.3330 ns | 7.4771 ns | 0.98 | 0.01 |
SliceCopyTo | 100000 | 0.4 | 81920 | 3,451.68 ns | 799.5655 ns | 45.1769 ns | 1.00 | 0.00 |
SliceCopyNew | 100000 | 0.4 | 81920 | 3,461.91 ns | 957.7346 ns | 54.1138 ns | 1.00 | 0.02 |
TryGetChunk | 100000 | 1 | 1 | 1,716,825.08 ns | 29,777.0998 ns | 1,682.4614 ns | 0.52 | 0.00 |
SliceCopyTo | 100000 | 1 | 1 | 3,302,254.55 ns | 22,682.6610 ns | 1,281.6124 ns | 1.00 | 0.00 |
SliceCopyNew | 100000 | 1 | 1 | 4,725,121.94 ns | 18,508.9365 ns | 1,045.7892 ns | 1.43 | 0.00 |
TryGetChunk | 100000 | 1 | 8 | 192,421.59 ns | 517.3142 ns | 29.2292 ns | 0.48 | 0.00 |
SliceCopyTo | 100000 | 1 | 8 | 399,447.64 ns | 7,098.5682 ns | 401.0823 ns | 1.00 | 0.00 |
SliceCopyNew | 100000 | 1 | 8 | 596,178.49 ns | 2,227.9159 ns | 125.8814 ns | 1.49 | 0.00 |
TryGetChunk | 100000 | 1 | 81920 | 51.57 ns | 4.1468 ns | 0.2343 ns | 0.02 | 0.00 |
SliceCopyTo | 100000 | 1 | 81920 | 3,338.23 ns | 1,269.0726 ns | 71.7050 ns | 1.00 | 0.00 |
SliceCopyNew | 100000 | 1 | 81920 | 3,380.18 ns | 49.8324 ns | 2.8156 ns | 1.01 | 0.02 |
As can be seen from the benchmarks above, TryGetChunk
is consistently faster than CopyTo
, and is normally 2-3 times faster.
this.sequence.Slice(i, blockSize).CopyTo(b);
This is an inefficient way of slicing ROB, you are slicing from the beginning every time. Fair comparison would be to do:
var s= this.sequence;
long blockSize = this.BlockSize;
long remainder = this.sequence.Length;
Span<byte> b = this.buffer;
while (remainder > 0)
{
if (remainder < blockSize)
{
blockSize = remainder;
}
s.CopyTo(b.Slice(0, blockSize ));
s = s.Slice(blockSize);
remainder -= blockSize;
// Use b here
}
The following line:
s.CopyTo(b.Slice(0, blockSize ));
Throws an ArgumentOutOfRangeException
as s
is of length > blockSize
. Also, b
is always of length blocksize
already, so I assume you meant to write:
[Benchmark]
public void SliceCopyNew()
{
ReadOnlySequence<byte> s = this.sequence;
int blockSize = this.BlockSize;
int remainder = (int)this.sequence.Length;
Span<byte> b = this.buffer;
while (remainder > 0)
{
if (remainder < blockSize)
{
blockSize = remainder;
}
s.Slice(0, blockSize).CopyTo(b);
s = s.Slice(blockSize);
remainder -= blockSize;
// Use b here
}
}
Which prevents a re-scan from the start of the sequence which 'should' be faster. However, in reality, it's slower for all tests, probably because it needs 2 calls to Slice
instead of one and this outweighs the speed of the several jumps needed to scan to the correct slice start in a single call? Currently, I only have segments going up to 10 segments per sequence (SegmentSize
of 0.1 means 10% of SequenceSize
, or 10 segments). I could extend the tests to include ones for much greater segments per sequence, but I felt these were pretty representative of network streams already and gives an idea of the general trend.
This illustrates how hard it is to get this implementation right for end users, and is another justification for having an implementation ready provided, particularly as it remains significantly faster.
(Rather than spam comments, I've updated the original code and benchmark above)