lemire/validateutf8.cs

## validateutf8.cs

                    Vector128<byte> shuf1 = Vector128.Create(TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG,
                            TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG,
                            TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS,
                            TOO_SHORT | OVERLONG_2,
                            TOO_SHORT,
                            TOO_SHORT | OVERLONG_3 | SURROGATE,
                            TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4);

                    Vector128<byte> shuf2 = Vector128.Create(CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4,
                            CARRY | OVERLONG_2,
                            CARRY,
                            CARRY,
                            CARRY | TOO_LARGE,
                            CARRY | TOO_LARGE | TOO_LARGE_1000,
                            CARRY | TOO_LARGE | TOO_LARGE_1000,
                            CARRY | TOO_LARGE | TOO_LARGE_1000,
                            CARRY | TOO_LARGE | TOO_LARGE_1000,
                            CARRY | TOO_LARGE | TOO_LARGE_1000,
                            CARRY | TOO_LARGE | TOO_LARGE_1000,
                            CARRY | TOO_LARGE | TOO_LARGE_1000,
                            CARRY | TOO_LARGE | TOO_LARGE_1000,
                            CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE,
                            CARRY | TOO_LARGE | TOO_LARGE_1000,
                            CARRY | TOO_LARGE | TOO_LARGE_1000);
                    Vector128<byte> shuf3 = Vector128.Create(TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT,
                            TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT,
                            TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | OVERLONG_4,
                            TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE,
                            TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE,
                            TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE,
                            TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT);

                    Vector128<byte> thirdByte = Vector128.Create((byte)(0b11100000u - 0x80));
                    Vector128<byte> fourthByte = Vector128.Create((byte)(0b11110000u - 0x80));
                    Vector128<byte> v0f = Vector128.Create((byte)0x0F);
                    Vector128<byte> v80 = Vector128.Create((byte)0x80);
                    // Performance note: we could process 64 bytes at a time for better speed in some cases.
                    for (; processedLength + 16 <= inputLength; processedLength += 16)
                    {

                        Vector128<byte> currentBlock = AdvSimd.LoadVector128(pInputBuffer + processedLength);

                        if (AdvSimd.Arm64.MaxAcross(currentBlock).ToScalar() > 127)
                        {
                            // We have an ASCII block, no need to process it, but
                            // we need to check if the previous block was incomplete.
                            if (AdvSimd.Arm64.MaxAcross(prevIncomplete).ToScalar() != 0)
                            {
                                return SimdUnicode.UTF8.RewindAndValidateWithErrors(processedLength, pInputBuffer + processedLength, inputLength - processedLength);
                            }
                            prevIncomplete = Vector128<byte>.Zero;
                        }
                        else
                        {
                            // Contains non-ASCII characters, we need to do non-trivial processing
                            //vextq
                            Vector128<byte> prev1 = AdvSimd.ExtractVector128(prevInputBlock, currentBlock, (byte)(16 - 1));
                            Vector128<byte> byte_1_high = Vector128.Shuffle(shuf1, AdvSimd.ShiftRightLogical(prev1.AsUInt16(), 4).AsByte() & v0f);
                            Vector128<byte> byte_1_low = Vector128.Shuffle(shuf2, (prev1 & v0f));
                            Vector128<byte> byte_2_high = Vector128.Shuffle(shuf3, AdvSimd.ShiftRightLogical(currentBlock.AsUInt16(), 4).AsByte() & v0f);
                            Vector128<byte> sc = AdvSimd.And(AdvSimd.And(byte_1_high, byte_1_low), byte_2_high);
                            Vector128<byte> prev2 = AdvSimd.ExtractVector128 (prevInputBlock, currentBlock, (byte)(16 - 2));
                            Vector128<byte> prev3 = AdvSimd.ExtractVector128 (prevInputBlock, currentBlock, (byte)(16 - 3));
                            prevInputBlock = currentBlock;
                            Vector128<byte> isThirdByte = AdvSimd.SubtractSaturate(prev2, thirdByte);
                            Vector128<byte> isFourthByte = AdvSimd.SubtractSaturate(prev3, fourthByte);
                            Vector128<byte> must23 = AdvSimd.Or(isThirdByte, isFourthByte);
                            Vector128<byte> must23As80 = AdvSimd.And(must23, v80);
                            Vector128<byte> error = AdvSimd.Xor(must23As80, sc);
                            if (AdvSimd.Arm64.MaxAcross(error).ToScalar() != 0)
                            {
                                return SimdUnicode.UTF8.RewindAndValidateWithErrors(processedLength, pInputBuffer + processedLength, inputLength - processedLength);
                            }
                            prevIncomplete = AdvSimd.SubtractSaturate(currentBlock, maxValue);
                        }
                    }

	Vector128<byte> shuf1 = Vector128.Create(TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG,
	TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG,
	TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS,
	TOO_SHORT \| OVERLONG_2,
	TOO_SHORT,
	TOO_SHORT \| OVERLONG_3 \| SURROGATE,
	TOO_SHORT \| TOO_LARGE \| TOO_LARGE_1000 \| OVERLONG_4);

	Vector128<byte> shuf2 = Vector128.Create(CARRY \| OVERLONG_3 \| OVERLONG_2 \| OVERLONG_4,
	CARRY \| OVERLONG_2,
	CARRY,
	CARRY,
	CARRY \| TOO_LARGE,
	CARRY \| TOO_LARGE \| TOO_LARGE_1000,
	CARRY \| TOO_LARGE \| TOO_LARGE_1000,
	CARRY \| TOO_LARGE \| TOO_LARGE_1000,
	CARRY \| TOO_LARGE \| TOO_LARGE_1000,
	CARRY \| TOO_LARGE \| TOO_LARGE_1000,
	CARRY \| TOO_LARGE \| TOO_LARGE_1000,
	CARRY \| TOO_LARGE \| TOO_LARGE_1000,
	CARRY \| TOO_LARGE \| TOO_LARGE_1000,
	CARRY \| TOO_LARGE \| TOO_LARGE_1000 \| SURROGATE,
	CARRY \| TOO_LARGE \| TOO_LARGE_1000,
	CARRY \| TOO_LARGE \| TOO_LARGE_1000);
	Vector128<byte> shuf3 = Vector128.Create(TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT,
	TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT,
	TOO_LONG \| OVERLONG_2 \| TWO_CONTS \| OVERLONG_3 \| TOO_LARGE_1000 \| OVERLONG_4,
	TOO_LONG \| OVERLONG_2 \| TWO_CONTS \| OVERLONG_3 \| TOO_LARGE,
	TOO_LONG \| OVERLONG_2 \| TWO_CONTS \| SURROGATE \| TOO_LARGE,
	TOO_LONG \| OVERLONG_2 \| TWO_CONTS \| SURROGATE \| TOO_LARGE,
	TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT);

	Vector128<byte> thirdByte = Vector128.Create((byte)(0b11100000u - 0x80));
	Vector128<byte> fourthByte = Vector128.Create((byte)(0b11110000u - 0x80));
	Vector128<byte> v0f = Vector128.Create((byte)0x0F);
	Vector128<byte> v80 = Vector128.Create((byte)0x80);
	// Performance note: we could process 64 bytes at a time for better speed in some cases.
	for (; processedLength + 16 <= inputLength; processedLength += 16)
	{

	Vector128<byte> currentBlock = AdvSimd.LoadVector128(pInputBuffer + processedLength);

	if (AdvSimd.Arm64.MaxAcross(currentBlock).ToScalar() > 127)
	{
	// We have an ASCII block, no need to process it, but
	// we need to check if the previous block was incomplete.
	if (AdvSimd.Arm64.MaxAcross(prevIncomplete).ToScalar() != 0)
	{
	return SimdUnicode.UTF8.RewindAndValidateWithErrors(processedLength, pInputBuffer + processedLength, inputLength - processedLength);
	}
	prevIncomplete = Vector128<byte>.Zero;
	}
	else
	{
	// Contains non-ASCII characters, we need to do non-trivial processing
	//vextq
	Vector128<byte> prev1 = AdvSimd.ExtractVector128(prevInputBlock, currentBlock, (byte)(16 - 1));
	Vector128<byte> byte_1_high = Vector128.Shuffle(shuf1, AdvSimd.ShiftRightLogical(prev1.AsUInt16(), 4).AsByte() & v0f);
	Vector128<byte> byte_1_low = Vector128.Shuffle(shuf2, (prev1 & v0f));
	Vector128<byte> byte_2_high = Vector128.Shuffle(shuf3, AdvSimd.ShiftRightLogical(currentBlock.AsUInt16(), 4).AsByte() & v0f);
	Vector128<byte> sc = AdvSimd.And(AdvSimd.And(byte_1_high, byte_1_low), byte_2_high);
	Vector128<byte> prev2 = AdvSimd.ExtractVector128 (prevInputBlock, currentBlock, (byte)(16 - 2));
	Vector128<byte> prev3 = AdvSimd.ExtractVector128 (prevInputBlock, currentBlock, (byte)(16 - 3));
	prevInputBlock = currentBlock;
	Vector128<byte> isThirdByte = AdvSimd.SubtractSaturate(prev2, thirdByte);
	Vector128<byte> isFourthByte = AdvSimd.SubtractSaturate(prev3, fourthByte);
	Vector128<byte> must23 = AdvSimd.Or(isThirdByte, isFourthByte);
	Vector128<byte> must23As80 = AdvSimd.And(must23, v80);
	Vector128<byte> error = AdvSimd.Xor(must23As80, sc);
	if (AdvSimd.Arm64.MaxAcross(error).ToScalar() != 0)
	{
	return SimdUnicode.UTF8.RewindAndValidateWithErrors(processedLength, pInputBuffer + processedLength, inputLength - processedLength);
	}
	prevIncomplete = AdvSimd.SubtractSaturate(currentBlock, maxValue);
	}
	}