Created
November 3, 2023 21:53
-
-
Save hardfalcon/126697b51ee2c23bd55688c81c02f3ea to your computer and use it in GitHub Desktop.
Update electron 26.4.3's node 18.16.1's simdutf from version 3.2.2 to 3.2.9 to fix AVX-related crashes
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/patches/node/.patches b/patches/node/.patches | |
index a1a9b463cd..1e2d21f858 100644 | |
--- a/patches/node/.patches | |
+++ b/patches/node/.patches | |
@@ -45,3 +45,4 @@ net_fix_crash_due_to_simultaneous_close_shutdown_on_js_stream.patch | |
net_use_asserts_in_js_socket_stream_to_catch_races_in_future.patch | |
lib_fix_broadcastchannel_initialization_location.patch | |
win_process_avoid_assert_after_spawning_store_app_4152.patch | |
+simdutf-3.2.9.patch | |
diff --git a/patches/node/simdutf-3.2.9.patch b/patches/node/simdutf-3.2.9.patch | |
new file mode 100644 | |
index 0000000000..cda825e8f4 | |
--- /dev/null | |
+++ b/patches/node/simdutf-3.2.9.patch | |
@@ -0,0 +1,6330 @@ | |
+diff --git a/deps/simdutf/simdutf.cpp b/deps/simdutf/simdutf.cpp | |
+index d3100c1561..bd5c137659 100644 | |
+--- a/deps/simdutf/simdutf.cpp | |
++++ b/deps/simdutf/simdutf.cpp | |
+@@ -1,8 +1,8 @@ | |
+-/* auto-generated on 2023-02-24 17:01:43 -0500. Do not edit! */ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf.cpp | |
++/* auto-generated on 2023-05-12 15:20:29 -0400. Do not edit! */ | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=simdutf.cpp | |
+ /* begin file src/simdutf.cpp */ | |
+ #include "simdutf.h" | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=implementation.cpp | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=implementation.cpp | |
+ /* begin file src/implementation.cpp */ | |
+ #include <initializer_list> | |
+ #include <climits> | |
+@@ -26,7 +26,7 @@ std::string toBinaryString(T b) { | |
+ | |
+ // Implementations | |
+ // The best choice should always come first! | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/arm64.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=simdutf/arm64.h | |
+ /* begin file src/simdutf/arm64.h */ | |
+ #ifndef SIMDUTF_ARM64_H | |
+ #define SIMDUTF_ARM64_H | |
+@@ -53,7 +53,7 @@ namespace arm64 { | |
+ } // namespace arm64 | |
+ } // namespace simdutf | |
+ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/arm64/implementation.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=simdutf/arm64/implementation.h | |
+ /* begin file src/simdutf/arm64/implementation.h */ | |
+ #ifndef SIMDUTF_ARM64_IMPLEMENTATION_H | |
+ #define SIMDUTF_ARM64_IMPLEMENTATION_H | |
+@@ -130,14 +130,14 @@ public: | |
+ #endif // SIMDUTF_ARM64_IMPLEMENTATION_H | |
+ /* end file src/simdutf/arm64/implementation.h */ | |
+ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/arm64/begin.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=simdutf/arm64/begin.h | |
+ /* begin file src/simdutf/arm64/begin.h */ | |
+ // redefining SIMDUTF_IMPLEMENTATION to "arm64" | |
+ // #define SIMDUTF_IMPLEMENTATION arm64 | |
+ /* end file src/simdutf/arm64/begin.h */ | |
+ | |
+ // Declarations | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/arm64/intrinsics.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=simdutf/arm64/intrinsics.h | |
+ /* begin file src/simdutf/arm64/intrinsics.h */ | |
+ #ifndef SIMDUTF_ARM64_INTRINSICS_H | |
+ #define SIMDUTF_ARM64_INTRINSICS_H | |
+@@ -149,7 +149,7 @@ public: | |
+ | |
+ #endif // SIMDUTF_ARM64_INTRINSICS_H | |
+ /* end file src/simdutf/arm64/intrinsics.h */ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/arm64/bitmanipulation.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=simdutf/arm64/bitmanipulation.h | |
+ /* begin file src/simdutf/arm64/bitmanipulation.h */ | |
+ #ifndef SIMDUTF_ARM64_BITMANIPULATION_H | |
+ #define SIMDUTF_ARM64_BITMANIPULATION_H | |
+@@ -169,7 +169,7 @@ simdutf_really_inline int count_ones(uint64_t input_num) { | |
+ | |
+ #endif // SIMDUTF_ARM64_BITMANIPULATION_H | |
+ /* end file src/simdutf/arm64/bitmanipulation.h */ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/arm64/simd.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=simdutf/arm64/simd.h | |
+ /* begin file src/simdutf/arm64/simd.h */ | |
+ #ifndef SIMDUTF_ARM64_SIMD_H | |
+ #define SIMDUTF_ARM64_SIMD_H | |
+@@ -782,7 +782,7 @@ simdutf_really_inline int16x8_t make_int16x8_t(int16_t x1, int16_t x2, int16_t | |
+ ).to_bitmask(); | |
+ } | |
+ }; // struct simd8x64<T> | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/arm64/simd16-inl.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=simdutf/arm64/simd16-inl.h | |
+ /* begin file src/simdutf/arm64/simd16-inl.h */ | |
+ template<typename T> | |
+ struct simd16; | |
+@@ -1097,7 +1097,7 @@ simdutf_really_inline simd16<int16_t>::operator simd16<uint16_t>() const { retur | |
+ #endif // SIMDUTF_ARM64_SIMD_H | |
+ /* end file src/simdutf/arm64/simd.h */ | |
+ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/arm64/end.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=simdutf/arm64/end.h | |
+ /* begin file src/simdutf/arm64/end.h */ | |
+ /* end file src/simdutf/arm64/end.h */ | |
+ | |
+@@ -1105,7 +1105,7 @@ simdutf_really_inline simd16<int16_t>::operator simd16<uint16_t>() const { retur | |
+ | |
+ #endif // SIMDUTF_ARM64_H | |
+ /* end file src/simdutf/arm64.h */ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/icelake.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=simdutf/icelake.h | |
+ /* begin file src/simdutf/icelake.h */ | |
+ #ifndef SIMDUTF_ICELAKE_H | |
+ #define SIMDUTF_ICELAKE_H | |
+@@ -1133,7 +1133,7 @@ simdutf_really_inline simd16<int16_t>::operator simd16<uint16_t>() const { retur | |
+ #define SIMDUTF_IMPLEMENTATION_ICELAKE ((SIMDUTF_IS_X86_64) && (SIMDUTF_COMPILER_SUPPORTS_VBMI2)) | |
+ #endif | |
+ | |
+-// To see why (__BMI__) && (__PCLMUL__) && (__LZCNT__) are not part of this next line, see | |
++// To see why (__BMI__) && (__LZCNT__) are not part of this next line, see | |
+ // https://github.com/simdutf/simdutf/issues/1247 | |
+ #define SIMDUTF_CAN_ALWAYS_RUN_ICELAKE ((SIMDUTF_IMPLEMENTATION_ICELAKE) && (SIMDUTF_IS_X86_64) && (__AVX2__) && (SIMDUTF_HAS_AVX512F && \ | |
+ SIMDUTF_HAS_AVX512DQ && \ | |
+@@ -1157,7 +1157,7 @@ namespace icelake { | |
+ // | |
+ // These two need to be included outside SIMDUTF_TARGET_REGION | |
+ // | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/icelake/intrinsics.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=simdutf/icelake/intrinsics.h | |
+ /* begin file src/simdutf/icelake/intrinsics.h */ | |
+ #ifndef SIMDUTF_ICELAKE_INTRINSICS_H | |
+ #define SIMDUTF_ICELAKE_INTRINSICS_H | |
+@@ -1217,7 +1217,6 @@ SIMDUTF_POP_DISABLE_WARNINGS | |
+ #include <tmmintrin.h> | |
+ #include <avxintrin.h> | |
+ #include <avx2intrin.h> | |
+-#include <wmmintrin.h> // for _mm_clmulepi64_si128 | |
+ // Important: we need the AVX-512 headers: | |
+ #include <avx512fintrin.h> | |
+ #include <avx512dqintrin.h> | |
+@@ -1268,7 +1267,7 @@ inline __m512i _mm512_set_epi8(uint8_t a0, uint8_t a1, uint8_t a2, uint8_t a3, u | |
+ | |
+ #endif // SIMDUTF_HASWELL_INTRINSICS_H | |
+ /* end file src/simdutf/icelake/intrinsics.h */ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/icelake/implementation.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=simdutf/icelake/implementation.h | |
+ /* begin file src/simdutf/icelake/implementation.h */ | |
+ #ifndef SIMDUTF_ICELAKE_IMPLEMENTATION_H | |
+ #define SIMDUTF_ICELAKE_IMPLEMENTATION_H | |
+@@ -1286,7 +1285,7 @@ public: | |
+ simdutf_really_inline implementation() : simdutf::implementation( | |
+ "icelake", | |
+ "Intel AVX512 (AVX-512BW, AVX-512CD, AVX-512VL, AVX-512VBMI2 extensions)", | |
+- internal::instruction_set::AVX2 | internal::instruction_set::PCLMULQDQ | internal::instruction_set::BMI1 | internal::instruction_set::BMI2 | internal::instruction_set::AVX512BW | internal::instruction_set::AVX512CD | internal::instruction_set::AVX512VL | internal::instruction_set::AVX512VBMI2 ) {} | |
++ internal::instruction_set::AVX2 | internal::instruction_set::BMI1 | internal::instruction_set::BMI2 | internal::instruction_set::AVX512BW | internal::instruction_set::AVX512CD | internal::instruction_set::AVX512VL | internal::instruction_set::AVX512VBMI2 ) {} | |
+ simdutf_warn_unused int detect_encodings(const char * input, size_t length) const noexcept final; | |
+ simdutf_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; | |
+ simdutf_warn_unused result validate_utf8_with_errors(const char *buf, size_t len) const noexcept final; | |
+@@ -1351,7 +1350,7 @@ public: | |
+ // | |
+ // The rest need to be inside the region | |
+ // | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/icelake/begin.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=simdutf/icelake/begin.h | |
+ /* begin file src/simdutf/icelake/begin.h */ | |
+ // redefining SIMDUTF_IMPLEMENTATION to "icelake" | |
+ // #define SIMDUTF_IMPLEMENTATION icelake | |
+@@ -1367,7 +1366,7 @@ SIMDUTF_DISABLE_GCC_WARNING(-Wmaybe-uninitialized) | |
+ #endif // end of workaround | |
+ /* end file src/simdutf/icelake/begin.h */ | |
+ // Declarations | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/icelake/bitmanipulation.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=simdutf/icelake/bitmanipulation.h | |
+ /* begin file src/simdutf/icelake/bitmanipulation.h */ | |
+ #ifndef SIMDUTF_ICELAKE_BITMANIPULATION_H | |
+ #define SIMDUTF_ICELAKE_BITMANIPULATION_H | |
+@@ -1393,7 +1392,7 @@ simdutf_really_inline long long int count_ones(uint64_t input_num) { | |
+ | |
+ #endif // SIMDUTF_ICELAKE_BITMANIPULATION_H | |
+ /* end file src/simdutf/icelake/bitmanipulation.h */ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/icelake/end.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=simdutf/icelake/end.h | |
+ /* begin file src/simdutf/icelake/end.h */ | |
+ #if SIMDUTF_CAN_ALWAYS_RUN_ICELAKE | |
+ // nothing needed. | |
+@@ -1412,7 +1411,7 @@ SIMDUTF_POP_DISABLE_WARNINGS | |
+ #endif // SIMDUTF_IMPLEMENTATION_ICELAKE | |
+ #endif // SIMDUTF_ICELAKE_H | |
+ /* end file src/simdutf/icelake.h */ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/haswell.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=simdutf/haswell.h | |
+ /* begin file src/simdutf/haswell.h */ | |
+ #ifndef SIMDUTF_HASWELL_H | |
+ #define SIMDUTF_HASWELL_H | |
+@@ -1439,13 +1438,13 @@ SIMDUTF_POP_DISABLE_WARNINGS | |
+ #endif | |
+ | |
+ #endif | |
+-// To see why (__BMI__) && (__PCLMUL__) && (__LZCNT__) are not part of this next line, see | |
++// To see why (__BMI__) && (__LZCNT__) are not part of this next line, see | |
+ // https://github.com/simdutf/simdutf/issues/1247 | |
+ #define SIMDUTF_CAN_ALWAYS_RUN_HASWELL ((SIMDUTF_IMPLEMENTATION_HASWELL) && (SIMDUTF_IS_X86_64) && (__AVX2__)) | |
+ | |
+ #if SIMDUTF_IMPLEMENTATION_HASWELL | |
+ | |
+-#define SIMDUTF_TARGET_HASWELL SIMDUTF_TARGET_REGION("avx2,bmi,pclmul,lzcnt") | |
++#define SIMDUTF_TARGET_HASWELL SIMDUTF_TARGET_REGION("avx2,bmi,lzcnt") | |
+ | |
+ namespace simdutf { | |
+ /** | |
+@@ -1458,7 +1457,7 @@ namespace haswell { | |
+ // | |
+ // These two need to be included outside SIMDUTF_TARGET_REGION | |
+ // | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/haswell/implementation.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=simdutf/haswell/implementation.h | |
+ /* begin file src/simdutf/haswell/implementation.h */ | |
+ #ifndef SIMDUTF_HASWELL_IMPLEMENTATION_H | |
+ #define SIMDUTF_HASWELL_IMPLEMENTATION_H | |
+@@ -1475,7 +1474,7 @@ public: | |
+ simdutf_really_inline implementation() : simdutf::implementation( | |
+ "haswell", | |
+ "Intel/AMD AVX2", | |
+- internal::instruction_set::AVX2 | internal::instruction_set::PCLMULQDQ | internal::instruction_set::BMI1 | internal::instruction_set::BMI2 | |
++ internal::instruction_set::AVX2 | internal::instruction_set::BMI1 | internal::instruction_set::BMI2 | |
+ ) {} | |
+ simdutf_warn_unused int detect_encodings(const char * input, size_t length) const noexcept final; | |
+ simdutf_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; | |
+@@ -1537,7 +1536,7 @@ public: | |
+ | |
+ #endif // SIMDUTF_HASWELL_IMPLEMENTATION_H | |
+ /* end file src/simdutf/haswell/implementation.h */ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/haswell/intrinsics.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=simdutf/haswell/intrinsics.h | |
+ /* begin file src/simdutf/haswell/intrinsics.h */ | |
+ #ifndef SIMDUTF_HASWELL_INTRINSICS_H | |
+ #define SIMDUTF_HASWELL_INTRINSICS_H | |
+@@ -1592,7 +1591,6 @@ SIMDUTF_POP_DISABLE_WARNINGS | |
+ #include <tmmintrin.h> | |
+ #include <avxintrin.h> | |
+ #include <avx2intrin.h> | |
+-#include <wmmintrin.h> // for _mm_clmulepi64_si128 | |
+ // unfortunately, we may not get _blsr_u64, but, thankfully, clang | |
+ // has it as a macro. | |
+ #ifndef _blsr_u64 | |
+@@ -1607,7 +1605,7 @@ SIMDUTF_POP_DISABLE_WARNINGS | |
+ // | |
+ // The rest need to be inside the region | |
+ // | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/haswell/begin.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=simdutf/haswell/begin.h | |
+ /* begin file src/simdutf/haswell/begin.h */ | |
+ // redefining SIMDUTF_IMPLEMENTATION to "haswell" | |
+ // #define SIMDUTF_IMPLEMENTATION haswell | |
+@@ -1623,7 +1621,7 @@ SIMDUTF_DISABLE_GCC_WARNING(-Wmaybe-uninitialized) | |
+ #endif // end of workaround | |
+ /* end file src/simdutf/haswell/begin.h */ | |
+ // Declarations | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/haswell/bitmanipulation.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=simdutf/haswell/bitmanipulation.h | |
+ /* begin file src/simdutf/haswell/bitmanipulation.h */ | |
+ #ifndef SIMDUTF_HASWELL_BITMANIPULATION_H | |
+ #define SIMDUTF_HASWELL_BITMANIPULATION_H | |
+@@ -1649,7 +1647,7 @@ simdutf_really_inline long long int count_ones(uint64_t input_num) { | |
+ | |
+ #endif // SIMDUTF_HASWELL_BITMANIPULATION_H | |
+ /* end file src/simdutf/haswell/bitmanipulation.h */ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/haswell/simd.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=simdutf/haswell/simd.h | |
+ /* begin file src/simdutf/haswell/simd.h */ | |
+ #ifndef SIMDUTF_HASWELL_SIMD_H | |
+ #define SIMDUTF_HASWELL_SIMD_H | |
+@@ -2045,7 +2043,7 @@ namespace simd { | |
+ } | |
+ }; // struct simd8x64<T> | |
+ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/haswell/simd16-inl.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=simdutf/haswell/simd16-inl.h | |
+ /* begin file src/simdutf/haswell/simd16-inl.h */ | |
+ #ifdef __GNUC__ | |
+ #if __GNUC__ < 8 | |
+@@ -2325,7 +2323,7 @@ struct simd16<uint16_t>: base16_numeric<uint16_t> { | |
+ #endif // SIMDUTF_HASWELL_SIMD_H | |
+ /* end file src/simdutf/haswell/simd.h */ | |
+ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/haswell/end.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=simdutf/haswell/end.h | |
+ /* begin file src/simdutf/haswell/end.h */ | |
+ #if SIMDUTF_CAN_ALWAYS_RUN_HASWELL | |
+ // nothing needed. | |
+@@ -2342,7 +2340,7 @@ SIMDUTF_POP_DISABLE_WARNINGS | |
+ #endif // SIMDUTF_IMPLEMENTATION_HASWELL | |
+ #endif // SIMDUTF_HASWELL_COMMON_H | |
+ /* end file src/simdutf/haswell.h */ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/westmere.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=simdutf/westmere.h | |
+ /* begin file src/simdutf/westmere.h */ | |
+ #ifndef SIMDUTF_WESTMERE_H | |
+ #define SIMDUTF_WESTMERE_H | |
+@@ -2366,11 +2364,11 @@ SIMDUTF_POP_DISABLE_WARNINGS | |
+ | |
+ #endif | |
+ | |
+-#define SIMDUTF_CAN_ALWAYS_RUN_WESTMERE (SIMDUTF_IMPLEMENTATION_WESTMERE && SIMDUTF_IS_X86_64 && __SSE4_2__ && __PCLMUL__) | |
++#define SIMDUTF_CAN_ALWAYS_RUN_WESTMERE (SIMDUTF_IMPLEMENTATION_WESTMERE && SIMDUTF_IS_X86_64 && __SSE4_2__) | |
+ | |
+ #if SIMDUTF_IMPLEMENTATION_WESTMERE | |
+ | |
+-#define SIMDUTF_TARGET_WESTMERE SIMDUTF_TARGET_REGION("sse4.2,pclmul") | |
++#define SIMDUTF_TARGET_WESTMERE SIMDUTF_TARGET_REGION("sse4.2") | |
+ | |
+ namespace simdutf { | |
+ /** | |
+@@ -2383,7 +2381,7 @@ namespace westmere { | |
+ // | |
+ // These two need to be included outside SIMDUTF_TARGET_REGION | |
+ // | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/westmere/implementation.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=simdutf/westmere/implementation.h | |
+ /* begin file src/simdutf/westmere/implementation.h */ | |
+ #ifndef SIMDUTF_WESTMERE_IMPLEMENTATION_H | |
+ #define SIMDUTF_WESTMERE_IMPLEMENTATION_H | |
+@@ -2399,7 +2397,7 @@ using namespace simdutf; | |
+ | |
+ class implementation final : public simdutf::implementation { | |
+ public: | |
+- simdutf_really_inline implementation() : simdutf::implementation("westmere", "Intel/AMD SSE4.2", internal::instruction_set::SSE42 | internal::instruction_set::PCLMULQDQ) {} | |
++ simdutf_really_inline implementation() : simdutf::implementation("westmere", "Intel/AMD SSE4.2", internal::instruction_set::SSE42) {} | |
+ simdutf_warn_unused int detect_encodings(const char * input, size_t length) const noexcept final; | |
+ simdutf_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; | |
+ simdutf_warn_unused result validate_utf8_with_errors(const char *buf, size_t len) const noexcept final; | |
+@@ -2460,7 +2458,7 @@ public: | |
+ | |
+ #endif // SIMDUTF_WESTMERE_IMPLEMENTATION_H | |
+ /* end file src/simdutf/westmere/implementation.h */ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/westmere/intrinsics.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=simdutf/westmere/intrinsics.h | |
+ /* begin file src/simdutf/westmere/intrinsics.h */ | |
+ #ifndef SIMDUTF_WESTMERE_INTRINSICS_H | |
+ #define SIMDUTF_WESTMERE_INTRINSICS_H | |
+@@ -2499,7 +2497,6 @@ SIMDUTF_POP_DISABLE_WARNINGS | |
+ * from macros: | |
+ */ | |
+ #include <smmintrin.h> // for _mm_alignr_epi8 | |
+-#include <wmmintrin.h> // for _mm_clmulepi64_si128 | |
+ #endif | |
+ | |
+ | |
+@@ -2510,7 +2507,7 @@ SIMDUTF_POP_DISABLE_WARNINGS | |
+ // | |
+ // The rest need to be inside the region | |
+ // | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/westmere/begin.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=simdutf/westmere/begin.h | |
+ /* begin file src/simdutf/westmere/begin.h */ | |
+ // redefining SIMDUTF_IMPLEMENTATION to "westmere" | |
+ // #define SIMDUTF_IMPLEMENTATION westmere | |
+@@ -2523,7 +2520,7 @@ SIMDUTF_TARGET_WESTMERE | |
+ /* end file src/simdutf/westmere/begin.h */ | |
+ | |
+ // Declarations | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/westmere/bitmanipulation.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=simdutf/westmere/bitmanipulation.h | |
+ /* begin file src/simdutf/westmere/bitmanipulation.h */ | |
+ #ifndef SIMDUTF_WESTMERE_BITMANIPULATION_H | |
+ #define SIMDUTF_WESTMERE_BITMANIPULATION_H | |
+@@ -2549,7 +2546,7 @@ simdutf_really_inline long long int count_ones(uint64_t input_num) { | |
+ | |
+ #endif // SIMDUTF_WESTMERE_BITMANIPULATION_H | |
+ /* end file src/simdutf/westmere/bitmanipulation.h */ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/westmere/simd.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=simdutf/westmere/simd.h | |
+ /* begin file src/simdutf/westmere/simd.h */ | |
+ #ifndef SIMDUTF_WESTMERE_SIMD_H | |
+ #define SIMDUTF_WESTMERE_SIMD_H | |
+@@ -2993,7 +2990,7 @@ namespace simd { | |
+ } | |
+ }; // struct simd8x64<T> | |
+ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/westmere/simd16-inl.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=simdutf/westmere/simd16-inl.h | |
+ /* begin file src/simdutf/westmere/simd16-inl.h */ | |
+ template<typename T> | |
+ struct simd16; | |
+@@ -3270,7 +3267,7 @@ template<typename T> | |
+ #endif // SIMDUTF_WESTMERE_SIMD_INPUT_H | |
+ /* end file src/simdutf/westmere/simd.h */ | |
+ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/westmere/end.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=simdutf/westmere/end.h | |
+ /* begin file src/simdutf/westmere/end.h */ | |
+ #if SIMDUTF_CAN_ALWAYS_RUN_WESTMERE | |
+ // nothing needed. | |
+@@ -3283,7 +3280,7 @@ SIMDUTF_UNTARGET_REGION | |
+ #endif // SIMDUTF_IMPLEMENTATION_WESTMERE | |
+ #endif // SIMDUTF_WESTMERE_COMMON_H | |
+ /* end file src/simdutf/westmere.h */ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/ppc64.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=simdutf/ppc64.h | |
+ /* begin file src/simdutf/ppc64.h */ | |
+ #ifndef SIMDUTF_PPC64_H | |
+ #define SIMDUTF_PPC64_H | |
+@@ -3310,7 +3307,7 @@ namespace ppc64 { | |
+ } // namespace ppc64 | |
+ } // namespace simdutf | |
+ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/ppc64/implementation.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=simdutf/ppc64/implementation.h | |
+ /* begin file src/simdutf/ppc64/implementation.h */ | |
+ #ifndef SIMDUTF_PPC64_IMPLEMENTATION_H | |
+ #define SIMDUTF_PPC64_IMPLEMENTATION_H | |
+@@ -3389,14 +3386,14 @@ public: | |
+ #endif // SIMDUTF_PPC64_IMPLEMENTATION_H | |
+ /* end file src/simdutf/ppc64/implementation.h */ | |
+ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/ppc64/begin.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=simdutf/ppc64/begin.h | |
+ /* begin file src/simdutf/ppc64/begin.h */ | |
+ // redefining SIMDUTF_IMPLEMENTATION to "ppc64" | |
+ // #define SIMDUTF_IMPLEMENTATION ppc64 | |
+ /* end file src/simdutf/ppc64/begin.h */ | |
+ | |
+ // Declarations | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/ppc64/intrinsics.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=simdutf/ppc64/intrinsics.h | |
+ /* begin file src/simdutf/ppc64/intrinsics.h */ | |
+ #ifndef SIMDUTF_PPC64_INTRINSICS_H | |
+ #define SIMDUTF_PPC64_INTRINSICS_H | |
+@@ -3417,7 +3414,7 @@ public: | |
+ | |
+ #endif // SIMDUTF_PPC64_INTRINSICS_H | |
+ /* end file src/simdutf/ppc64/intrinsics.h */ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/ppc64/bitmanipulation.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=simdutf/ppc64/bitmanipulation.h | |
+ /* begin file src/simdutf/ppc64/bitmanipulation.h */ | |
+ #ifndef SIMDUTF_PPC64_BITMANIPULATION_H | |
+ #define SIMDUTF_PPC64_BITMANIPULATION_H | |
+@@ -3443,7 +3440,7 @@ simdutf_really_inline int count_ones(uint64_t input_num) { | |
+ | |
+ #endif // SIMDUTF_PPC64_BITMANIPULATION_H | |
+ /* end file src/simdutf/ppc64/bitmanipulation.h */ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/ppc64/simd.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=simdutf/ppc64/simd.h | |
+ /* begin file src/simdutf/ppc64/simd.h */ | |
+ #ifndef SIMDUTF_PPC64_SIMD_H | |
+ #define SIMDUTF_PPC64_SIMD_H | |
+@@ -3935,7 +3932,7 @@ template <typename T> struct simd8x64 { | |
+ #endif // SIMDUTF_PPC64_SIMD_INPUT_H | |
+ /* end file src/simdutf/ppc64/simd.h */ | |
+ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/ppc64/end.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=simdutf/ppc64/end.h | |
+ /* begin file src/simdutf/ppc64/end.h */ | |
+ /* end file src/simdutf/ppc64/end.h */ | |
+ | |
+@@ -3943,7 +3940,7 @@ template <typename T> struct simd8x64 { | |
+ | |
+ #endif // SIMDUTF_PPC64_H | |
+ /* end file src/simdutf/ppc64.h */ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/fallback.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=simdutf/fallback.h | |
+ /* begin file src/simdutf/fallback.h */ | |
+ #ifndef SIMDUTF_FALLBACK_H | |
+ #define SIMDUTF_FALLBACK_H | |
+@@ -3972,7 +3969,7 @@ namespace fallback { | |
+ } // namespace fallback | |
+ } // namespace simdutf | |
+ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/fallback/implementation.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=simdutf/fallback/implementation.h | |
+ /* begin file src/simdutf/fallback/implementation.h */ | |
+ #ifndef SIMDUTF_FALLBACK_IMPLEMENTATION_H | |
+ #define SIMDUTF_FALLBACK_IMPLEMENTATION_H | |
+@@ -4053,14 +4050,14 @@ public: | |
+ #endif // SIMDUTF_FALLBACK_IMPLEMENTATION_H | |
+ /* end file src/simdutf/fallback/implementation.h */ | |
+ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/fallback/begin.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=simdutf/fallback/begin.h | |
+ /* begin file src/simdutf/fallback/begin.h */ | |
+ // redefining SIMDUTF_IMPLEMENTATION to "fallback" | |
+ // #define SIMDUTF_IMPLEMENTATION fallback | |
+ /* end file src/simdutf/fallback/begin.h */ | |
+ | |
+ // Declarations | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/fallback/bitmanipulation.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=simdutf/fallback/bitmanipulation.h | |
+ /* begin file src/simdutf/fallback/bitmanipulation.h */ | |
+ #ifndef SIMDUTF_FALLBACK_BITMANIPULATION_H | |
+ #define SIMDUTF_FALLBACK_BITMANIPULATION_H | |
+@@ -4095,7 +4092,7 @@ static unsigned char _BitScanReverse64(unsigned long* ret, uint64_t x) { | |
+ #endif // SIMDUTF_FALLBACK_BITMANIPULATION_H | |
+ /* end file src/simdutf/fallback/bitmanipulation.h */ | |
+ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/fallback/end.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=simdutf/fallback/end.h | |
+ /* begin file src/simdutf/fallback/end.h */ | |
+ /* end file src/simdutf/fallback/end.h */ | |
+ | |
+@@ -4812,7 +4809,7 @@ simdutf_warn_unused result convert_utf16be_to_utf8_with_errors(const char16_t * | |
+ return get_active_implementation()->convert_utf16be_to_utf8_with_errors(buf, len, utf8_buffer); | |
+ } | |
+ simdutf_warn_unused size_t convert_valid_utf16_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) noexcept { | |
+- #if BIG_ENDIAN | |
++ #if SIMDUTF_IS_BIG_ENDIAN | |
+ return convert_valid_utf16be_to_utf8(buf, len, utf8_buffer); | |
+ #else | |
+ return convert_valid_utf16le_to_utf8(buf, len, utf8_buffer); | |
+@@ -4984,7 +4981,7 @@ const implementation * builtin_implementation() { | |
+ } // namespace simdutf | |
+ | |
+ /* end file src/implementation.cpp */ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=encoding_types.cpp | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=encoding_types.cpp | |
+ /* begin file src/encoding_types.cpp */ | |
+ | |
+ namespace simdutf { | |
+@@ -5046,7 +5043,7 @@ encoding_type check_bom(const char* byte, size_t length) { | |
+ } | |
+ } | |
+ /* end file src/encoding_types.cpp */ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=error.cpp | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=error.cpp | |
+ /* begin file src/error.cpp */ | |
+ namespace simdutf { | |
+ | |
+@@ -5058,7 +5055,7 @@ namespace simdutf { | |
+ /* end file src/error.cpp */ | |
+ // The large tables should be included once and they | |
+ // should not depend on a kernel. | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=tables/utf8_to_utf16_tables.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=tables/utf8_to_utf16_tables.h | |
+ /* begin file src/tables/utf8_to_utf16_tables.h */ | |
+ #ifndef SIMDUTF_UTF8_TO_UTF16_TABLES_H | |
+ #define SIMDUTF_UTF8_TO_UTF16_TABLES_H | |
+@@ -5294,31 +5291,31 @@ const uint8_t shufutf8[209][16] = | |
+ /* number of two + three bytes : 145 */ | |
+ /* number of two + three + four bytes : 209 */ | |
+ const uint8_t utf8bigindex[4096][2] = | |
+-{ {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++{ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {145, 3}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {146, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {149, 4}, | |
+ {161, 4}, | |
+ {64, 4}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {147, 5}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {150, 5}, | |
+ {162, 5}, | |
+ {65, 5}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {153, 5}, | |
+ {165, 5}, | |
+ {67, 5}, | |
+@@ -5326,15 +5323,15 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {73, 5}, | |
+ {91, 5}, | |
+ {64, 4}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {148, 6}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {151, 6}, | |
+ {163, 6}, | |
+ {66, 6}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {154, 6}, | |
+ {166, 6}, | |
+ {68, 6}, | |
+@@ -5342,7 +5339,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {74, 6}, | |
+ {92, 6}, | |
+ {64, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {157, 6}, | |
+ {169, 6}, | |
+ {70, 6}, | |
+@@ -5358,15 +5355,15 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {73, 5}, | |
+ {91, 5}, | |
+ {0, 6}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {152, 7}, | |
+ {164, 7}, | |
+ {145, 3}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {155, 7}, | |
+ {167, 7}, | |
+ {69, 7}, | |
+@@ -5374,7 +5371,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {75, 7}, | |
+ {93, 7}, | |
+ {64, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {158, 7}, | |
+ {170, 7}, | |
+ {71, 7}, | |
+@@ -5390,8 +5387,8 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {73, 5}, | |
+ {91, 5}, | |
+ {1, 7}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {173, 7}, | |
+ {148, 6}, | |
+ {185, 7}, | |
+@@ -5406,7 +5403,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {74, 6}, | |
+ {92, 6}, | |
+ {2, 7}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {157, 6}, | |
+ {109, 7}, | |
+ {70, 6}, | |
+@@ -5422,15 +5419,15 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {16, 7}, | |
+ {32, 7}, | |
+ {0, 6}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {145, 3}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {156, 8}, | |
+ {168, 8}, | |
+ {146, 4}, | |
+@@ -5438,7 +5435,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {149, 4}, | |
+ {161, 4}, | |
+ {64, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {159, 8}, | |
+ {171, 8}, | |
+ {72, 8}, | |
+@@ -5454,8 +5451,8 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {73, 5}, | |
+ {91, 5}, | |
+ {64, 4}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {174, 8}, | |
+ {148, 6}, | |
+ {186, 8}, | |
+@@ -5470,7 +5467,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {74, 6}, | |
+ {92, 6}, | |
+ {3, 8}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {157, 6}, | |
+ {110, 8}, | |
+ {70, 6}, | |
+@@ -5486,10 +5483,10 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {17, 8}, | |
+ {33, 8}, | |
+ {0, 6}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {189, 8}, | |
+ {152, 7}, | |
+ {164, 7}, | |
+@@ -5502,7 +5499,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {75, 7}, | |
+ {93, 7}, | |
+ {64, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {158, 7}, | |
+ {112, 8}, | |
+ {71, 7}, | |
+@@ -5518,8 +5515,8 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {18, 8}, | |
+ {34, 8}, | |
+ {1, 7}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {173, 7}, | |
+ {148, 6}, | |
+ {136, 8}, | |
+@@ -5534,7 +5531,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {20, 8}, | |
+ {36, 8}, | |
+ {2, 7}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {157, 6}, | |
+ {109, 7}, | |
+ {70, 6}, | |
+@@ -5550,23 +5547,23 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {16, 7}, | |
+ {32, 7}, | |
+ {0, 6}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {145, 3}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {146, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {149, 4}, | |
+ {161, 4}, | |
+ {64, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {160, 9}, | |
+ {172, 9}, | |
+ {147, 5}, | |
+@@ -5582,8 +5579,8 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {73, 5}, | |
+ {91, 5}, | |
+ {64, 4}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {175, 9}, | |
+ {148, 6}, | |
+ {187, 9}, | |
+@@ -5598,7 +5595,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {74, 6}, | |
+ {92, 6}, | |
+ {64, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {157, 6}, | |
+ {111, 9}, | |
+ {70, 6}, | |
+@@ -5614,10 +5611,10 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {73, 5}, | |
+ {91, 5}, | |
+ {0, 6}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {190, 9}, | |
+ {152, 7}, | |
+ {164, 7}, | |
+@@ -5630,7 +5627,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {75, 7}, | |
+ {93, 7}, | |
+ {64, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {158, 7}, | |
+ {113, 9}, | |
+ {71, 7}, | |
+@@ -5646,8 +5643,8 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {19, 9}, | |
+ {35, 9}, | |
+ {1, 7}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {173, 7}, | |
+ {148, 6}, | |
+ {137, 9}, | |
+@@ -5662,7 +5659,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {21, 9}, | |
+ {37, 9}, | |
+ {2, 7}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {157, 6}, | |
+ {109, 7}, | |
+ {70, 6}, | |
+@@ -5678,13 +5675,13 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {16, 7}, | |
+ {32, 7}, | |
+ {0, 6}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {145, 3}, | |
+ {205, 9}, | |
+ {156, 8}, | |
+@@ -5694,7 +5691,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {149, 4}, | |
+ {161, 4}, | |
+ {64, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {159, 8}, | |
+ {115, 9}, | |
+ {72, 8}, | |
+@@ -5710,8 +5707,8 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {73, 5}, | |
+ {91, 5}, | |
+ {64, 4}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {174, 8}, | |
+ {148, 6}, | |
+ {139, 9}, | |
+@@ -5726,7 +5723,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {22, 9}, | |
+ {38, 9}, | |
+ {3, 8}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {157, 6}, | |
+ {110, 8}, | |
+ {70, 6}, | |
+@@ -5742,10 +5739,10 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {17, 8}, | |
+ {33, 8}, | |
+ {0, 6}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {189, 8}, | |
+ {152, 7}, | |
+ {164, 7}, | |
+@@ -5758,7 +5755,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {75, 7}, | |
+ {93, 7}, | |
+ {64, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {158, 7}, | |
+ {112, 8}, | |
+ {71, 7}, | |
+@@ -5774,8 +5771,8 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {18, 8}, | |
+ {34, 8}, | |
+ {1, 7}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {173, 7}, | |
+ {148, 6}, | |
+ {136, 8}, | |
+@@ -5790,7 +5787,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {20, 8}, | |
+ {36, 8}, | |
+ {2, 7}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {157, 6}, | |
+ {109, 7}, | |
+ {70, 6}, | |
+@@ -5806,31 +5803,31 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {16, 7}, | |
+ {32, 7}, | |
+ {0, 6}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {145, 3}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {146, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {149, 4}, | |
+ {161, 4}, | |
+ {64, 4}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {147, 5}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {150, 5}, | |
+ {162, 5}, | |
+ {65, 5}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {153, 5}, | |
+ {165, 5}, | |
+ {67, 5}, | |
+@@ -5838,8 +5835,8 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {73, 5}, | |
+ {91, 5}, | |
+ {64, 4}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {176, 10}, | |
+ {148, 6}, | |
+ {188, 10}, | |
+@@ -5854,7 +5851,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {74, 6}, | |
+ {92, 6}, | |
+ {64, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {157, 6}, | |
+ {169, 6}, | |
+ {70, 6}, | |
+@@ -5870,10 +5867,10 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {73, 5}, | |
+ {91, 5}, | |
+ {0, 6}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {191, 10}, | |
+ {152, 7}, | |
+ {164, 7}, | |
+@@ -5886,7 +5883,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {75, 7}, | |
+ {93, 7}, | |
+ {64, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {158, 7}, | |
+ {114, 10}, | |
+ {71, 7}, | |
+@@ -5902,8 +5899,8 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {73, 5}, | |
+ {91, 5}, | |
+ {1, 7}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {173, 7}, | |
+ {148, 6}, | |
+ {138, 10}, | |
+@@ -5918,7 +5915,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {74, 6}, | |
+ {92, 6}, | |
+ {2, 7}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {157, 6}, | |
+ {109, 7}, | |
+ {70, 6}, | |
+@@ -5934,13 +5931,13 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {16, 7}, | |
+ {32, 7}, | |
+ {0, 6}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {145, 3}, | |
+ {206, 10}, | |
+ {156, 8}, | |
+@@ -5950,7 +5947,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {149, 4}, | |
+ {161, 4}, | |
+ {64, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {159, 8}, | |
+ {116, 10}, | |
+ {72, 8}, | |
+@@ -5966,8 +5963,8 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {73, 5}, | |
+ {91, 5}, | |
+ {64, 4}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {174, 8}, | |
+ {148, 6}, | |
+ {140, 10}, | |
+@@ -5982,7 +5979,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {23, 10}, | |
+ {39, 10}, | |
+ {3, 8}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {157, 6}, | |
+ {110, 8}, | |
+ {70, 6}, | |
+@@ -5998,10 +5995,10 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {17, 8}, | |
+ {33, 8}, | |
+ {0, 6}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {189, 8}, | |
+ {152, 7}, | |
+ {164, 7}, | |
+@@ -6014,7 +6011,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {75, 7}, | |
+ {93, 7}, | |
+ {64, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {158, 7}, | |
+ {112, 8}, | |
+ {71, 7}, | |
+@@ -6030,8 +6027,8 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {18, 8}, | |
+ {34, 8}, | |
+ {1, 7}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {173, 7}, | |
+ {148, 6}, | |
+ {136, 8}, | |
+@@ -6046,7 +6043,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {20, 8}, | |
+ {36, 8}, | |
+ {2, 7}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {157, 6}, | |
+ {109, 7}, | |
+ {70, 6}, | |
+@@ -6062,23 +6059,23 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {16, 7}, | |
+ {32, 7}, | |
+ {0, 6}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {145, 3}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {146, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {149, 4}, | |
+ {161, 4}, | |
+ {64, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {160, 9}, | |
+ {172, 9}, | |
+ {147, 5}, | |
+@@ -6094,8 +6091,8 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {73, 5}, | |
+ {91, 5}, | |
+ {64, 4}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {175, 9}, | |
+ {148, 6}, | |
+ {142, 10}, | |
+@@ -6110,7 +6107,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {74, 6}, | |
+ {92, 6}, | |
+ {64, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {157, 6}, | |
+ {111, 9}, | |
+ {70, 6}, | |
+@@ -6126,10 +6123,10 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {73, 5}, | |
+ {91, 5}, | |
+ {0, 6}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {190, 9}, | |
+ {152, 7}, | |
+ {164, 7}, | |
+@@ -6142,7 +6139,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {75, 7}, | |
+ {93, 7}, | |
+ {64, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {158, 7}, | |
+ {113, 9}, | |
+ {71, 7}, | |
+@@ -6158,8 +6155,8 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {19, 9}, | |
+ {35, 9}, | |
+ {1, 7}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {173, 7}, | |
+ {148, 6}, | |
+ {137, 9}, | |
+@@ -6174,7 +6171,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {21, 9}, | |
+ {37, 9}, | |
+ {2, 7}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {157, 6}, | |
+ {109, 7}, | |
+ {70, 6}, | |
+@@ -6190,13 +6187,13 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {16, 7}, | |
+ {32, 7}, | |
+ {0, 6}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {145, 3}, | |
+ {205, 9}, | |
+ {156, 8}, | |
+@@ -6206,7 +6203,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {149, 4}, | |
+ {161, 4}, | |
+ {64, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {159, 8}, | |
+ {115, 9}, | |
+ {72, 8}, | |
+@@ -6222,8 +6219,8 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {73, 5}, | |
+ {91, 5}, | |
+ {64, 4}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {174, 8}, | |
+ {148, 6}, | |
+ {139, 9}, | |
+@@ -6238,7 +6235,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {22, 9}, | |
+ {38, 9}, | |
+ {3, 8}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {157, 6}, | |
+ {110, 8}, | |
+ {70, 6}, | |
+@@ -6254,10 +6251,10 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {17, 8}, | |
+ {33, 8}, | |
+ {0, 6}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {189, 8}, | |
+ {152, 7}, | |
+ {164, 7}, | |
+@@ -6270,7 +6267,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {75, 7}, | |
+ {93, 7}, | |
+ {64, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {158, 7}, | |
+ {112, 8}, | |
+ {71, 7}, | |
+@@ -6286,8 +6283,8 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {18, 8}, | |
+ {34, 8}, | |
+ {1, 7}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {173, 7}, | |
+ {148, 6}, | |
+ {136, 8}, | |
+@@ -6302,7 +6299,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {20, 8}, | |
+ {36, 8}, | |
+ {2, 7}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {157, 6}, | |
+ {109, 7}, | |
+ {70, 6}, | |
+@@ -6318,31 +6315,31 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {16, 7}, | |
+ {32, 7}, | |
+ {0, 6}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {145, 3}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {146, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {149, 4}, | |
+ {161, 4}, | |
+ {64, 4}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {147, 5}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {150, 5}, | |
+ {162, 5}, | |
+ {65, 5}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {153, 5}, | |
+ {165, 5}, | |
+ {67, 5}, | |
+@@ -6350,15 +6347,15 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {73, 5}, | |
+ {91, 5}, | |
+ {64, 4}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {148, 6}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {151, 6}, | |
+ {163, 6}, | |
+ {66, 6}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {154, 6}, | |
+ {166, 6}, | |
+ {68, 6}, | |
+@@ -6366,7 +6363,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {74, 6}, | |
+ {92, 6}, | |
+ {64, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {157, 6}, | |
+ {169, 6}, | |
+ {70, 6}, | |
+@@ -6382,10 +6379,10 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {73, 5}, | |
+ {91, 5}, | |
+ {0, 6}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {192, 11}, | |
+ {152, 7}, | |
+ {164, 7}, | |
+@@ -6398,7 +6395,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {75, 7}, | |
+ {93, 7}, | |
+ {64, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {158, 7}, | |
+ {170, 7}, | |
+ {71, 7}, | |
+@@ -6414,8 +6411,8 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {73, 5}, | |
+ {91, 5}, | |
+ {1, 7}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {173, 7}, | |
+ {148, 6}, | |
+ {185, 7}, | |
+@@ -6430,7 +6427,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {74, 6}, | |
+ {92, 6}, | |
+ {2, 7}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {157, 6}, | |
+ {109, 7}, | |
+ {70, 6}, | |
+@@ -6446,13 +6443,13 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {16, 7}, | |
+ {32, 7}, | |
+ {0, 6}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {145, 3}, | |
+ {207, 11}, | |
+ {156, 8}, | |
+@@ -6462,7 +6459,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {149, 4}, | |
+ {161, 4}, | |
+ {64, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {159, 8}, | |
+ {117, 11}, | |
+ {72, 8}, | |
+@@ -6478,8 +6475,8 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {73, 5}, | |
+ {91, 5}, | |
+ {64, 4}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {174, 8}, | |
+ {148, 6}, | |
+ {141, 11}, | |
+@@ -6494,7 +6491,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {74, 6}, | |
+ {92, 6}, | |
+ {3, 8}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {157, 6}, | |
+ {110, 8}, | |
+ {70, 6}, | |
+@@ -6510,10 +6507,10 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {17, 8}, | |
+ {33, 8}, | |
+ {0, 6}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {189, 8}, | |
+ {152, 7}, | |
+ {164, 7}, | |
+@@ -6526,7 +6523,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {75, 7}, | |
+ {93, 7}, | |
+ {64, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {158, 7}, | |
+ {112, 8}, | |
+ {71, 7}, | |
+@@ -6542,8 +6539,8 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {18, 8}, | |
+ {34, 8}, | |
+ {1, 7}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {173, 7}, | |
+ {148, 6}, | |
+ {136, 8}, | |
+@@ -6558,7 +6555,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {20, 8}, | |
+ {36, 8}, | |
+ {2, 7}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {157, 6}, | |
+ {109, 7}, | |
+ {70, 6}, | |
+@@ -6574,23 +6571,23 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {16, 7}, | |
+ {32, 7}, | |
+ {0, 6}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {145, 3}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {146, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {149, 4}, | |
+ {161, 4}, | |
+ {64, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {160, 9}, | |
+ {172, 9}, | |
+ {147, 5}, | |
+@@ -6606,8 +6603,8 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {73, 5}, | |
+ {91, 5}, | |
+ {64, 4}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {175, 9}, | |
+ {148, 6}, | |
+ {143, 11}, | |
+@@ -6622,7 +6619,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {74, 6}, | |
+ {92, 6}, | |
+ {64, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {157, 6}, | |
+ {111, 9}, | |
+ {70, 6}, | |
+@@ -6638,10 +6635,10 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {73, 5}, | |
+ {91, 5}, | |
+ {0, 6}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {190, 9}, | |
+ {152, 7}, | |
+ {164, 7}, | |
+@@ -6654,7 +6651,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {75, 7}, | |
+ {93, 7}, | |
+ {64, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {158, 7}, | |
+ {113, 9}, | |
+ {71, 7}, | |
+@@ -6670,8 +6667,8 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {19, 9}, | |
+ {35, 9}, | |
+ {1, 7}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {173, 7}, | |
+ {148, 6}, | |
+ {137, 9}, | |
+@@ -6686,7 +6683,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {21, 9}, | |
+ {37, 9}, | |
+ {2, 7}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {157, 6}, | |
+ {109, 7}, | |
+ {70, 6}, | |
+@@ -6702,13 +6699,13 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {16, 7}, | |
+ {32, 7}, | |
+ {0, 6}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {145, 3}, | |
+ {205, 9}, | |
+ {156, 8}, | |
+@@ -6718,7 +6715,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {149, 4}, | |
+ {161, 4}, | |
+ {64, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {159, 8}, | |
+ {115, 9}, | |
+ {72, 8}, | |
+@@ -6734,8 +6731,8 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {73, 5}, | |
+ {91, 5}, | |
+ {64, 4}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {174, 8}, | |
+ {148, 6}, | |
+ {139, 9}, | |
+@@ -6750,7 +6747,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {22, 9}, | |
+ {38, 9}, | |
+ {3, 8}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {157, 6}, | |
+ {110, 8}, | |
+ {70, 6}, | |
+@@ -6766,10 +6763,10 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {17, 8}, | |
+ {33, 8}, | |
+ {0, 6}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {189, 8}, | |
+ {152, 7}, | |
+ {164, 7}, | |
+@@ -6782,7 +6779,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {75, 7}, | |
+ {93, 7}, | |
+ {64, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {158, 7}, | |
+ {112, 8}, | |
+ {71, 7}, | |
+@@ -6798,8 +6795,8 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {18, 8}, | |
+ {34, 8}, | |
+ {1, 7}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {173, 7}, | |
+ {148, 6}, | |
+ {136, 8}, | |
+@@ -6814,7 +6811,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {20, 8}, | |
+ {36, 8}, | |
+ {2, 7}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {157, 6}, | |
+ {109, 7}, | |
+ {70, 6}, | |
+@@ -6830,31 +6827,31 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {16, 7}, | |
+ {32, 7}, | |
+ {0, 6}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {145, 3}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {146, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {149, 4}, | |
+ {161, 4}, | |
+ {64, 4}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {147, 5}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {150, 5}, | |
+ {162, 5}, | |
+ {65, 5}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {153, 5}, | |
+ {165, 5}, | |
+ {67, 5}, | |
+@@ -6862,8 +6859,8 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {73, 5}, | |
+ {91, 5}, | |
+ {64, 4}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {176, 10}, | |
+ {148, 6}, | |
+ {188, 10}, | |
+@@ -6878,7 +6875,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {74, 6}, | |
+ {92, 6}, | |
+ {64, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {157, 6}, | |
+ {169, 6}, | |
+ {70, 6}, | |
+@@ -6894,10 +6891,10 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {73, 5}, | |
+ {91, 5}, | |
+ {0, 6}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {191, 10}, | |
+ {152, 7}, | |
+ {164, 7}, | |
+@@ -6910,7 +6907,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {75, 7}, | |
+ {93, 7}, | |
+ {64, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {158, 7}, | |
+ {114, 10}, | |
+ {71, 7}, | |
+@@ -6926,8 +6923,8 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {73, 5}, | |
+ {91, 5}, | |
+ {1, 7}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {173, 7}, | |
+ {148, 6}, | |
+ {138, 10}, | |
+@@ -6942,7 +6939,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {74, 6}, | |
+ {92, 6}, | |
+ {2, 7}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {157, 6}, | |
+ {109, 7}, | |
+ {70, 6}, | |
+@@ -6958,13 +6955,13 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {16, 7}, | |
+ {32, 7}, | |
+ {0, 6}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {145, 3}, | |
+ {206, 10}, | |
+ {156, 8}, | |
+@@ -6974,7 +6971,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {149, 4}, | |
+ {161, 4}, | |
+ {64, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {159, 8}, | |
+ {116, 10}, | |
+ {72, 8}, | |
+@@ -6990,8 +6987,8 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {73, 5}, | |
+ {91, 5}, | |
+ {64, 4}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {174, 8}, | |
+ {148, 6}, | |
+ {140, 10}, | |
+@@ -7006,7 +7003,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {23, 10}, | |
+ {39, 10}, | |
+ {3, 8}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {157, 6}, | |
+ {110, 8}, | |
+ {70, 6}, | |
+@@ -7022,10 +7019,10 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {17, 8}, | |
+ {33, 8}, | |
+ {0, 6}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {189, 8}, | |
+ {152, 7}, | |
+ {164, 7}, | |
+@@ -7038,7 +7035,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {75, 7}, | |
+ {93, 7}, | |
+ {64, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {158, 7}, | |
+ {112, 8}, | |
+ {71, 7}, | |
+@@ -7054,8 +7051,8 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {18, 8}, | |
+ {34, 8}, | |
+ {1, 7}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {173, 7}, | |
+ {148, 6}, | |
+ {136, 8}, | |
+@@ -7070,7 +7067,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {20, 8}, | |
+ {36, 8}, | |
+ {2, 7}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {157, 6}, | |
+ {109, 7}, | |
+ {70, 6}, | |
+@@ -7086,23 +7083,23 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {16, 7}, | |
+ {32, 7}, | |
+ {0, 6}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {145, 3}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {146, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {149, 4}, | |
+ {161, 4}, | |
+ {64, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {160, 9}, | |
+ {172, 9}, | |
+ {147, 5}, | |
+@@ -7118,8 +7115,8 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {73, 5}, | |
+ {91, 5}, | |
+ {64, 4}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {175, 9}, | |
+ {148, 6}, | |
+ {142, 10}, | |
+@@ -7134,7 +7131,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {74, 6}, | |
+ {92, 6}, | |
+ {64, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {157, 6}, | |
+ {111, 9}, | |
+ {70, 6}, | |
+@@ -7150,10 +7147,10 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {73, 5}, | |
+ {91, 5}, | |
+ {0, 6}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {190, 9}, | |
+ {152, 7}, | |
+ {164, 7}, | |
+@@ -7166,7 +7163,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {75, 7}, | |
+ {93, 7}, | |
+ {64, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {158, 7}, | |
+ {113, 9}, | |
+ {71, 7}, | |
+@@ -7182,8 +7179,8 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {19, 9}, | |
+ {35, 9}, | |
+ {1, 7}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {173, 7}, | |
+ {148, 6}, | |
+ {137, 9}, | |
+@@ -7198,7 +7195,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {21, 9}, | |
+ {37, 9}, | |
+ {2, 7}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {157, 6}, | |
+ {109, 7}, | |
+ {70, 6}, | |
+@@ -7214,13 +7211,13 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {16, 7}, | |
+ {32, 7}, | |
+ {0, 6}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {145, 3}, | |
+ {205, 9}, | |
+ {156, 8}, | |
+@@ -7230,7 +7227,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {149, 4}, | |
+ {161, 4}, | |
+ {64, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {159, 8}, | |
+ {115, 9}, | |
+ {72, 8}, | |
+@@ -7246,8 +7243,8 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {73, 5}, | |
+ {91, 5}, | |
+ {64, 4}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {174, 8}, | |
+ {148, 6}, | |
+ {139, 9}, | |
+@@ -7262,7 +7259,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {22, 9}, | |
+ {38, 9}, | |
+ {3, 8}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {157, 6}, | |
+ {110, 8}, | |
+ {70, 6}, | |
+@@ -7278,10 +7275,10 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {17, 8}, | |
+ {33, 8}, | |
+ {0, 6}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {189, 8}, | |
+ {152, 7}, | |
+ {164, 7}, | |
+@@ -7294,7 +7291,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {75, 7}, | |
+ {93, 7}, | |
+ {64, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {158, 7}, | |
+ {112, 8}, | |
+ {71, 7}, | |
+@@ -7310,8 +7307,8 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {18, 8}, | |
+ {34, 8}, | |
+ {1, 7}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {173, 7}, | |
+ {148, 6}, | |
+ {136, 8}, | |
+@@ -7326,7 +7323,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {20, 8}, | |
+ {36, 8}, | |
+ {2, 7}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {157, 6}, | |
+ {109, 7}, | |
+ {70, 6}, | |
+@@ -7342,31 +7339,31 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {16, 7}, | |
+ {32, 7}, | |
+ {0, 6}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {145, 3}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {146, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {149, 4}, | |
+ {161, 4}, | |
+ {64, 4}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {147, 5}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {150, 5}, | |
+ {162, 5}, | |
+ {65, 5}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {153, 5}, | |
+ {165, 5}, | |
+ {67, 5}, | |
+@@ -7374,15 +7371,15 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {73, 5}, | |
+ {91, 5}, | |
+ {64, 4}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {148, 6}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {151, 6}, | |
+ {163, 6}, | |
+ {66, 6}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {154, 6}, | |
+ {166, 6}, | |
+ {68, 6}, | |
+@@ -7390,7 +7387,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {74, 6}, | |
+ {92, 6}, | |
+ {64, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {157, 6}, | |
+ {169, 6}, | |
+ {70, 6}, | |
+@@ -7406,15 +7403,15 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {73, 5}, | |
+ {91, 5}, | |
+ {0, 6}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {152, 7}, | |
+ {164, 7}, | |
+ {145, 3}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {155, 7}, | |
+ {167, 7}, | |
+ {69, 7}, | |
+@@ -7422,7 +7419,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {75, 7}, | |
+ {93, 7}, | |
+ {64, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {158, 7}, | |
+ {170, 7}, | |
+ {71, 7}, | |
+@@ -7438,8 +7435,8 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {73, 5}, | |
+ {91, 5}, | |
+ {1, 7}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {173, 7}, | |
+ {148, 6}, | |
+ {185, 7}, | |
+@@ -7454,7 +7451,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {74, 6}, | |
+ {92, 6}, | |
+ {2, 7}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {157, 6}, | |
+ {109, 7}, | |
+ {70, 6}, | |
+@@ -7470,13 +7467,13 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {16, 7}, | |
+ {32, 7}, | |
+ {0, 6}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {145, 3}, | |
+ {208, 12}, | |
+ {156, 8}, | |
+@@ -7486,7 +7483,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {149, 4}, | |
+ {161, 4}, | |
+ {64, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {159, 8}, | |
+ {171, 8}, | |
+ {72, 8}, | |
+@@ -7502,8 +7499,8 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {73, 5}, | |
+ {91, 5}, | |
+ {64, 4}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {174, 8}, | |
+ {148, 6}, | |
+ {186, 8}, | |
+@@ -7518,7 +7515,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {74, 6}, | |
+ {92, 6}, | |
+ {3, 8}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {157, 6}, | |
+ {110, 8}, | |
+ {70, 6}, | |
+@@ -7534,10 +7531,10 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {17, 8}, | |
+ {33, 8}, | |
+ {0, 6}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {189, 8}, | |
+ {152, 7}, | |
+ {164, 7}, | |
+@@ -7550,7 +7547,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {75, 7}, | |
+ {93, 7}, | |
+ {64, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {158, 7}, | |
+ {112, 8}, | |
+ {71, 7}, | |
+@@ -7566,8 +7563,8 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {18, 8}, | |
+ {34, 8}, | |
+ {1, 7}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {173, 7}, | |
+ {148, 6}, | |
+ {136, 8}, | |
+@@ -7582,7 +7579,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {20, 8}, | |
+ {36, 8}, | |
+ {2, 7}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {157, 6}, | |
+ {109, 7}, | |
+ {70, 6}, | |
+@@ -7598,23 +7595,23 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {16, 7}, | |
+ {32, 7}, | |
+ {0, 6}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {145, 3}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {146, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {149, 4}, | |
+ {161, 4}, | |
+ {64, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {160, 9}, | |
+ {172, 9}, | |
+ {147, 5}, | |
+@@ -7630,8 +7627,8 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {73, 5}, | |
+ {91, 5}, | |
+ {64, 4}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {175, 9}, | |
+ {148, 6}, | |
+ {144, 12}, | |
+@@ -7646,7 +7643,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {74, 6}, | |
+ {92, 6}, | |
+ {64, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {157, 6}, | |
+ {111, 9}, | |
+ {70, 6}, | |
+@@ -7662,10 +7659,10 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {73, 5}, | |
+ {91, 5}, | |
+ {0, 6}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {190, 9}, | |
+ {152, 7}, | |
+ {164, 7}, | |
+@@ -7678,7 +7675,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {75, 7}, | |
+ {93, 7}, | |
+ {64, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {158, 7}, | |
+ {113, 9}, | |
+ {71, 7}, | |
+@@ -7694,8 +7691,8 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {19, 9}, | |
+ {35, 9}, | |
+ {1, 7}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {173, 7}, | |
+ {148, 6}, | |
+ {137, 9}, | |
+@@ -7710,7 +7707,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {21, 9}, | |
+ {37, 9}, | |
+ {2, 7}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {157, 6}, | |
+ {109, 7}, | |
+ {70, 6}, | |
+@@ -7726,13 +7723,13 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {16, 7}, | |
+ {32, 7}, | |
+ {0, 6}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {145, 3}, | |
+ {205, 9}, | |
+ {156, 8}, | |
+@@ -7742,7 +7739,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {149, 4}, | |
+ {161, 4}, | |
+ {64, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {159, 8}, | |
+ {115, 9}, | |
+ {72, 8}, | |
+@@ -7758,8 +7755,8 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {73, 5}, | |
+ {91, 5}, | |
+ {64, 4}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {174, 8}, | |
+ {148, 6}, | |
+ {139, 9}, | |
+@@ -7774,7 +7771,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {22, 9}, | |
+ {38, 9}, | |
+ {3, 8}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {157, 6}, | |
+ {110, 8}, | |
+ {70, 6}, | |
+@@ -7790,10 +7787,10 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {17, 8}, | |
+ {33, 8}, | |
+ {0, 6}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {189, 8}, | |
+ {152, 7}, | |
+ {164, 7}, | |
+@@ -7806,7 +7803,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {75, 7}, | |
+ {93, 7}, | |
+ {64, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {158, 7}, | |
+ {112, 8}, | |
+ {71, 7}, | |
+@@ -7822,8 +7819,8 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {18, 8}, | |
+ {34, 8}, | |
+ {1, 7}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {173, 7}, | |
+ {148, 6}, | |
+ {136, 8}, | |
+@@ -7838,7 +7835,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {20, 8}, | |
+ {36, 8}, | |
+ {2, 7}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {157, 6}, | |
+ {109, 7}, | |
+ {70, 6}, | |
+@@ -7854,31 +7851,31 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {16, 7}, | |
+ {32, 7}, | |
+ {0, 6}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {145, 3}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {146, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {149, 4}, | |
+ {161, 4}, | |
+ {64, 4}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {147, 5}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {150, 5}, | |
+ {162, 5}, | |
+ {65, 5}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {153, 5}, | |
+ {165, 5}, | |
+ {67, 5}, | |
+@@ -7886,8 +7883,8 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {73, 5}, | |
+ {91, 5}, | |
+ {64, 4}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {176, 10}, | |
+ {148, 6}, | |
+ {188, 10}, | |
+@@ -7902,7 +7899,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {74, 6}, | |
+ {92, 6}, | |
+ {64, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {157, 6}, | |
+ {169, 6}, | |
+ {70, 6}, | |
+@@ -7918,10 +7915,10 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {73, 5}, | |
+ {91, 5}, | |
+ {0, 6}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {191, 10}, | |
+ {152, 7}, | |
+ {164, 7}, | |
+@@ -7934,7 +7931,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {75, 7}, | |
+ {93, 7}, | |
+ {64, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {158, 7}, | |
+ {114, 10}, | |
+ {71, 7}, | |
+@@ -7950,8 +7947,8 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {73, 5}, | |
+ {91, 5}, | |
+ {1, 7}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {173, 7}, | |
+ {148, 6}, | |
+ {138, 10}, | |
+@@ -7966,7 +7963,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {74, 6}, | |
+ {92, 6}, | |
+ {2, 7}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {157, 6}, | |
+ {109, 7}, | |
+ {70, 6}, | |
+@@ -7982,13 +7979,13 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {16, 7}, | |
+ {32, 7}, | |
+ {0, 6}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {145, 3}, | |
+ {206, 10}, | |
+ {156, 8}, | |
+@@ -7998,7 +7995,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {149, 4}, | |
+ {161, 4}, | |
+ {64, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {159, 8}, | |
+ {116, 10}, | |
+ {72, 8}, | |
+@@ -8014,8 +8011,8 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {73, 5}, | |
+ {91, 5}, | |
+ {64, 4}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {174, 8}, | |
+ {148, 6}, | |
+ {140, 10}, | |
+@@ -8030,7 +8027,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {23, 10}, | |
+ {39, 10}, | |
+ {3, 8}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {157, 6}, | |
+ {110, 8}, | |
+ {70, 6}, | |
+@@ -8046,10 +8043,10 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {17, 8}, | |
+ {33, 8}, | |
+ {0, 6}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {189, 8}, | |
+ {152, 7}, | |
+ {164, 7}, | |
+@@ -8062,7 +8059,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {75, 7}, | |
+ {93, 7}, | |
+ {64, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {158, 7}, | |
+ {112, 8}, | |
+ {71, 7}, | |
+@@ -8078,8 +8075,8 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {18, 8}, | |
+ {34, 8}, | |
+ {1, 7}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {173, 7}, | |
+ {148, 6}, | |
+ {136, 8}, | |
+@@ -8094,7 +8091,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {20, 8}, | |
+ {36, 8}, | |
+ {2, 7}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {157, 6}, | |
+ {109, 7}, | |
+ {70, 6}, | |
+@@ -8110,23 +8107,23 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {16, 7}, | |
+ {32, 7}, | |
+ {0, 6}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {145, 3}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {146, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {149, 4}, | |
+ {161, 4}, | |
+ {64, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {160, 9}, | |
+ {172, 9}, | |
+ {147, 5}, | |
+@@ -8142,8 +8139,8 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {73, 5}, | |
+ {91, 5}, | |
+ {64, 4}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {175, 9}, | |
+ {148, 6}, | |
+ {142, 10}, | |
+@@ -8158,7 +8155,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {74, 6}, | |
+ {92, 6}, | |
+ {64, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {157, 6}, | |
+ {111, 9}, | |
+ {70, 6}, | |
+@@ -8174,10 +8171,10 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {73, 5}, | |
+ {91, 5}, | |
+ {0, 6}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {190, 9}, | |
+ {152, 7}, | |
+ {164, 7}, | |
+@@ -8190,7 +8187,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {75, 7}, | |
+ {93, 7}, | |
+ {64, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {158, 7}, | |
+ {113, 9}, | |
+ {71, 7}, | |
+@@ -8206,8 +8203,8 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {19, 9}, | |
+ {35, 9}, | |
+ {1, 7}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {173, 7}, | |
+ {148, 6}, | |
+ {137, 9}, | |
+@@ -8222,7 +8219,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {21, 9}, | |
+ {37, 9}, | |
+ {2, 7}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {157, 6}, | |
+ {109, 7}, | |
+ {70, 6}, | |
+@@ -8238,13 +8235,13 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {16, 7}, | |
+ {32, 7}, | |
+ {0, 6}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {145, 3}, | |
+ {205, 9}, | |
+ {156, 8}, | |
+@@ -8254,7 +8251,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {149, 4}, | |
+ {161, 4}, | |
+ {64, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {159, 8}, | |
+ {115, 9}, | |
+ {72, 8}, | |
+@@ -8270,8 +8267,8 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {73, 5}, | |
+ {91, 5}, | |
+ {64, 4}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {174, 8}, | |
+ {148, 6}, | |
+ {139, 9}, | |
+@@ -8286,7 +8283,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {22, 9}, | |
+ {38, 9}, | |
+ {3, 8}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {157, 6}, | |
+ {110, 8}, | |
+ {70, 6}, | |
+@@ -8302,10 +8299,10 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {17, 8}, | |
+ {33, 8}, | |
+ {0, 6}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {189, 8}, | |
+ {152, 7}, | |
+ {164, 7}, | |
+@@ -8318,7 +8315,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {75, 7}, | |
+ {93, 7}, | |
+ {64, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {158, 7}, | |
+ {112, 8}, | |
+ {71, 7}, | |
+@@ -8334,8 +8331,8 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {18, 8}, | |
+ {34, 8}, | |
+ {1, 7}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {173, 7}, | |
+ {148, 6}, | |
+ {136, 8}, | |
+@@ -8350,7 +8347,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {20, 8}, | |
+ {36, 8}, | |
+ {2, 7}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {157, 6}, | |
+ {109, 7}, | |
+ {70, 6}, | |
+@@ -8366,31 +8363,31 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {16, 7}, | |
+ {32, 7}, | |
+ {0, 6}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {145, 3}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {146, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {149, 4}, | |
+ {161, 4}, | |
+ {64, 4}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {147, 5}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {150, 5}, | |
+ {162, 5}, | |
+ {65, 5}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {153, 5}, | |
+ {165, 5}, | |
+ {67, 5}, | |
+@@ -8398,15 +8395,15 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {73, 5}, | |
+ {91, 5}, | |
+ {64, 4}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {148, 6}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {151, 6}, | |
+ {163, 6}, | |
+ {66, 6}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {154, 6}, | |
+ {166, 6}, | |
+ {68, 6}, | |
+@@ -8414,7 +8411,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {74, 6}, | |
+ {92, 6}, | |
+ {64, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {157, 6}, | |
+ {169, 6}, | |
+ {70, 6}, | |
+@@ -8430,10 +8427,10 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {73, 5}, | |
+ {91, 5}, | |
+ {0, 6}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {192, 11}, | |
+ {152, 7}, | |
+ {164, 7}, | |
+@@ -8446,7 +8443,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {75, 7}, | |
+ {93, 7}, | |
+ {64, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {158, 7}, | |
+ {170, 7}, | |
+ {71, 7}, | |
+@@ -8462,8 +8459,8 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {73, 5}, | |
+ {91, 5}, | |
+ {1, 7}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {173, 7}, | |
+ {148, 6}, | |
+ {185, 7}, | |
+@@ -8478,7 +8475,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {74, 6}, | |
+ {92, 6}, | |
+ {2, 7}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {157, 6}, | |
+ {109, 7}, | |
+ {70, 6}, | |
+@@ -8494,13 +8491,13 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {16, 7}, | |
+ {32, 7}, | |
+ {0, 6}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {145, 3}, | |
+ {207, 11}, | |
+ {156, 8}, | |
+@@ -8510,7 +8507,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {149, 4}, | |
+ {161, 4}, | |
+ {64, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {159, 8}, | |
+ {117, 11}, | |
+ {72, 8}, | |
+@@ -8526,8 +8523,8 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {73, 5}, | |
+ {91, 5}, | |
+ {64, 4}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {174, 8}, | |
+ {148, 6}, | |
+ {141, 11}, | |
+@@ -8542,7 +8539,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {74, 6}, | |
+ {92, 6}, | |
+ {3, 8}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {157, 6}, | |
+ {110, 8}, | |
+ {70, 6}, | |
+@@ -8558,10 +8555,10 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {17, 8}, | |
+ {33, 8}, | |
+ {0, 6}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {189, 8}, | |
+ {152, 7}, | |
+ {164, 7}, | |
+@@ -8574,7 +8571,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {75, 7}, | |
+ {93, 7}, | |
+ {64, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {158, 7}, | |
+ {112, 8}, | |
+ {71, 7}, | |
+@@ -8590,8 +8587,8 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {18, 8}, | |
+ {34, 8}, | |
+ {1, 7}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {173, 7}, | |
+ {148, 6}, | |
+ {136, 8}, | |
+@@ -8606,7 +8603,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {20, 8}, | |
+ {36, 8}, | |
+ {2, 7}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {157, 6}, | |
+ {109, 7}, | |
+ {70, 6}, | |
+@@ -8622,23 +8619,23 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {16, 7}, | |
+ {32, 7}, | |
+ {0, 6}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {145, 3}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {146, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {149, 4}, | |
+ {161, 4}, | |
+ {64, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {160, 9}, | |
+ {172, 9}, | |
+ {147, 5}, | |
+@@ -8654,8 +8651,8 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {73, 5}, | |
+ {91, 5}, | |
+ {64, 4}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {175, 9}, | |
+ {148, 6}, | |
+ {143, 11}, | |
+@@ -8670,7 +8667,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {74, 6}, | |
+ {92, 6}, | |
+ {64, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {157, 6}, | |
+ {111, 9}, | |
+ {70, 6}, | |
+@@ -8686,10 +8683,10 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {73, 5}, | |
+ {91, 5}, | |
+ {0, 6}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {190, 9}, | |
+ {152, 7}, | |
+ {164, 7}, | |
+@@ -8702,7 +8699,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {75, 7}, | |
+ {93, 7}, | |
+ {64, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {158, 7}, | |
+ {113, 9}, | |
+ {71, 7}, | |
+@@ -8718,8 +8715,8 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {19, 9}, | |
+ {35, 9}, | |
+ {1, 7}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {173, 7}, | |
+ {148, 6}, | |
+ {137, 9}, | |
+@@ -8734,7 +8731,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {21, 9}, | |
+ {37, 9}, | |
+ {2, 7}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {157, 6}, | |
+ {109, 7}, | |
+ {70, 6}, | |
+@@ -8750,13 +8747,13 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {16, 7}, | |
+ {32, 7}, | |
+ {0, 6}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {145, 3}, | |
+ {205, 9}, | |
+ {156, 8}, | |
+@@ -8766,7 +8763,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {149, 4}, | |
+ {161, 4}, | |
+ {64, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {159, 8}, | |
+ {115, 9}, | |
+ {72, 8}, | |
+@@ -8782,8 +8779,8 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {73, 5}, | |
+ {91, 5}, | |
+ {64, 4}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {174, 8}, | |
+ {148, 6}, | |
+ {139, 9}, | |
+@@ -8798,7 +8795,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {22, 9}, | |
+ {38, 9}, | |
+ {3, 8}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {157, 6}, | |
+ {110, 8}, | |
+ {70, 6}, | |
+@@ -8814,10 +8811,10 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {17, 8}, | |
+ {33, 8}, | |
+ {0, 6}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {189, 8}, | |
+ {152, 7}, | |
+ {164, 7}, | |
+@@ -8830,7 +8827,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {75, 7}, | |
+ {93, 7}, | |
+ {64, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {158, 7}, | |
+ {112, 8}, | |
+ {71, 7}, | |
+@@ -8846,8 +8843,8 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {18, 8}, | |
+ {34, 8}, | |
+ {1, 7}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {173, 7}, | |
+ {148, 6}, | |
+ {136, 8}, | |
+@@ -8862,7 +8859,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {20, 8}, | |
+ {36, 8}, | |
+ {2, 7}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {157, 6}, | |
+ {109, 7}, | |
+ {70, 6}, | |
+@@ -8878,31 +8875,31 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {16, 7}, | |
+ {32, 7}, | |
+ {0, 6}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {145, 3}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {146, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {149, 4}, | |
+ {161, 4}, | |
+ {64, 4}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {147, 5}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {150, 5}, | |
+ {162, 5}, | |
+ {65, 5}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {153, 5}, | |
+ {165, 5}, | |
+ {67, 5}, | |
+@@ -8910,8 +8907,8 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {73, 5}, | |
+ {91, 5}, | |
+ {64, 4}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {176, 10}, | |
+ {148, 6}, | |
+ {188, 10}, | |
+@@ -8926,7 +8923,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {74, 6}, | |
+ {92, 6}, | |
+ {64, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {157, 6}, | |
+ {169, 6}, | |
+ {70, 6}, | |
+@@ -8942,10 +8939,10 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {73, 5}, | |
+ {91, 5}, | |
+ {0, 6}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {191, 10}, | |
+ {152, 7}, | |
+ {164, 7}, | |
+@@ -8958,7 +8955,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {75, 7}, | |
+ {93, 7}, | |
+ {64, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {158, 7}, | |
+ {114, 10}, | |
+ {71, 7}, | |
+@@ -8974,8 +8971,8 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {73, 5}, | |
+ {91, 5}, | |
+ {1, 7}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {173, 7}, | |
+ {148, 6}, | |
+ {138, 10}, | |
+@@ -8990,7 +8987,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {74, 6}, | |
+ {92, 6}, | |
+ {2, 7}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {157, 6}, | |
+ {109, 7}, | |
+ {70, 6}, | |
+@@ -9006,13 +9003,13 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {16, 7}, | |
+ {32, 7}, | |
+ {0, 6}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {145, 3}, | |
+ {206, 10}, | |
+ {156, 8}, | |
+@@ -9022,7 +9019,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {149, 4}, | |
+ {161, 4}, | |
+ {64, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {159, 8}, | |
+ {116, 10}, | |
+ {72, 8}, | |
+@@ -9038,8 +9035,8 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {73, 5}, | |
+ {91, 5}, | |
+ {64, 4}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {174, 8}, | |
+ {148, 6}, | |
+ {140, 10}, | |
+@@ -9054,7 +9051,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {23, 10}, | |
+ {39, 10}, | |
+ {3, 8}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {157, 6}, | |
+ {110, 8}, | |
+ {70, 6}, | |
+@@ -9070,10 +9067,10 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {17, 8}, | |
+ {33, 8}, | |
+ {0, 6}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {189, 8}, | |
+ {152, 7}, | |
+ {164, 7}, | |
+@@ -9086,7 +9083,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {75, 7}, | |
+ {93, 7}, | |
+ {64, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {158, 7}, | |
+ {112, 8}, | |
+ {71, 7}, | |
+@@ -9102,8 +9099,8 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {18, 8}, | |
+ {34, 8}, | |
+ {1, 7}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {173, 7}, | |
+ {148, 6}, | |
+ {136, 8}, | |
+@@ -9118,7 +9115,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {20, 8}, | |
+ {36, 8}, | |
+ {2, 7}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {157, 6}, | |
+ {109, 7}, | |
+ {70, 6}, | |
+@@ -9134,23 +9131,23 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {16, 7}, | |
+ {32, 7}, | |
+ {0, 6}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {145, 3}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {146, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {149, 4}, | |
+ {161, 4}, | |
+ {64, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {160, 9}, | |
+ {172, 9}, | |
+ {147, 5}, | |
+@@ -9166,8 +9163,8 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {73, 5}, | |
+ {91, 5}, | |
+ {64, 4}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {175, 9}, | |
+ {148, 6}, | |
+ {142, 10}, | |
+@@ -9182,7 +9179,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {74, 6}, | |
+ {92, 6}, | |
+ {64, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {157, 6}, | |
+ {111, 9}, | |
+ {70, 6}, | |
+@@ -9198,10 +9195,10 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {73, 5}, | |
+ {91, 5}, | |
+ {0, 6}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {190, 9}, | |
+ {152, 7}, | |
+ {164, 7}, | |
+@@ -9214,7 +9211,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {75, 7}, | |
+ {93, 7}, | |
+ {64, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {158, 7}, | |
+ {113, 9}, | |
+ {71, 7}, | |
+@@ -9230,8 +9227,8 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {19, 9}, | |
+ {35, 9}, | |
+ {1, 7}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {173, 7}, | |
+ {148, 6}, | |
+ {137, 9}, | |
+@@ -9246,7 +9243,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {21, 9}, | |
+ {37, 9}, | |
+ {2, 7}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {157, 6}, | |
+ {109, 7}, | |
+ {70, 6}, | |
+@@ -9262,13 +9259,13 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {16, 7}, | |
+ {32, 7}, | |
+ {0, 6}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {145, 3}, | |
+ {205, 9}, | |
+ {156, 8}, | |
+@@ -9278,7 +9275,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {149, 4}, | |
+ {161, 4}, | |
+ {64, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {159, 8}, | |
+ {115, 9}, | |
+ {72, 8}, | |
+@@ -9294,8 +9291,8 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {73, 5}, | |
+ {91, 5}, | |
+ {64, 4}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {174, 8}, | |
+ {148, 6}, | |
+ {139, 9}, | |
+@@ -9310,7 +9307,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {22, 9}, | |
+ {38, 9}, | |
+ {3, 8}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {157, 6}, | |
+ {110, 8}, | |
+ {70, 6}, | |
+@@ -9326,10 +9323,10 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {17, 8}, | |
+ {33, 8}, | |
+ {0, 6}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {189, 8}, | |
+ {152, 7}, | |
+ {164, 7}, | |
+@@ -9342,7 +9339,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {75, 7}, | |
+ {93, 7}, | |
+ {64, 4}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {158, 7}, | |
+ {112, 8}, | |
+ {71, 7}, | |
+@@ -9358,8 +9355,8 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {18, 8}, | |
+ {34, 8}, | |
+ {1, 7}, | |
+- {0, 12}, | |
+- {0, 12}, | |
++ {209, 12}, | |
++ {209, 12}, | |
+ {173, 7}, | |
+ {148, 6}, | |
+ {136, 8}, | |
+@@ -9374,7 +9371,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ {20, 8}, | |
+ {36, 8}, | |
+ {2, 7}, | |
+- {0, 12}, | |
++ {209, 12}, | |
+ {157, 6}, | |
+ {109, 7}, | |
+ {70, 6}, | |
+@@ -9397,7 +9394,7 @@ const uint8_t utf8bigindex[4096][2] = | |
+ | |
+ #endif // SIMDUTF_UTF8_TO_UTF16_TABLES_H | |
+ /* end file src/tables/utf8_to_utf16_tables.h */ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=tables/utf16_to_utf8_tables.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=tables/utf16_to_utf8_tables.h | |
+ /* begin file src/tables/utf16_to_utf8_tables.h */ | |
+ // file generated by scripts/sse_convert_utf16_to_utf8.py | |
+ #ifndef SIMDUTF_UTF16_TO_UTF8_TABLES_H | |
+@@ -9938,7 +9935,7 @@ namespace utf16_to_utf8 { | |
+ // End of tables. | |
+ | |
+ // The scalar routines should be included once. | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=scalar/ascii.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=scalar/ascii.h | |
+ /* begin file src/scalar/ascii.h */ | |
+ #ifndef SIMDUTF_ASCII_H | |
+ #define SIMDUTF_ASCII_H | |
+@@ -9999,7 +9996,7 @@ inline simdutf_warn_unused result validate_with_errors(const char *buf, size_t l | |
+ | |
+ #endif | |
+ /* end file src/scalar/ascii.h */ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=scalar/utf8.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=scalar/utf8.h | |
+ /* begin file src/scalar/utf8.h */ | |
+ #ifndef SIMDUTF_UTF8_H | |
+ #define SIMDUTF_UTF8_H | |
+@@ -10189,7 +10186,7 @@ inline size_t utf16_length_from_utf8(const char* buf, size_t len) { | |
+ | |
+ #endif | |
+ /* end file src/scalar/utf8.h */ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=scalar/utf16.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=scalar/utf16.h | |
+ /* begin file src/scalar/utf16.h */ | |
+ #ifndef SIMDUTF_UTF16_H | |
+ #define SIMDUTF_UTF16_H | |
+@@ -10303,7 +10300,7 @@ simdutf_really_inline void change_endianness_utf16(const char16_t* in, size_t si | |
+ | |
+ #endif | |
+ /* end file src/scalar/utf16.h */ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=scalar/utf32.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=scalar/utf32.h | |
+ /* begin file src/scalar/utf32.h */ | |
+ #ifndef SIMDUTF_UTF32_H | |
+ #define SIMDUTF_UTF32_H | |
+@@ -10378,7 +10375,7 @@ inline size_t utf16_length_from_utf32(const char32_t* buf, size_t len) { | |
+ #endif | |
+ /* end file src/scalar/utf32.h */ | |
+ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=scalar/utf32_to_utf8/valid_utf32_to_utf8.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=scalar/utf32_to_utf8/valid_utf32_to_utf8.h | |
+ /* begin file src/scalar/utf32_to_utf8/valid_utf32_to_utf8.h */ | |
+ #ifndef SIMDUTF_VALID_UTF32_TO_UTF8_H | |
+ #define SIMDUTF_VALID_UTF32_TO_UTF8_H | |
+@@ -10445,7 +10442,7 @@ inline size_t convert_valid(const char32_t* buf, size_t len, char* utf8_output) | |
+ | |
+ #endif | |
+ /* end file src/scalar/utf32_to_utf8/valid_utf32_to_utf8.h */ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=scalar/utf32_to_utf8/utf32_to_utf8.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=scalar/utf32_to_utf8/utf32_to_utf8.h | |
+ /* begin file src/scalar/utf32_to_utf8/utf32_to_utf8.h */ | |
+ #ifndef SIMDUTF_UTF32_TO_UTF8_H | |
+ #define SIMDUTF_UTF32_TO_UTF8_H | |
+@@ -10561,7 +10558,7 @@ inline result convert_with_errors(const char32_t* buf, size_t len, char* utf8_ou | |
+ #endif | |
+ /* end file src/scalar/utf32_to_utf8/utf32_to_utf8.h */ | |
+ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=scalar/utf32_to_utf16/valid_utf32_to_utf16.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=scalar/utf32_to_utf16/valid_utf32_to_utf16.h | |
+ /* begin file src/scalar/utf32_to_utf16/valid_utf32_to_utf16.h */ | |
+ #ifndef SIMDUTF_VALID_UTF32_TO_UTF16_H | |
+ #define SIMDUTF_VALID_UTF32_TO_UTF16_H | |
+@@ -10606,7 +10603,7 @@ inline size_t convert_valid(const char32_t* buf, size_t len, char16_t* utf16_out | |
+ | |
+ #endif | |
+ /* end file src/scalar/utf32_to_utf16/valid_utf32_to_utf16.h */ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=scalar/utf32_to_utf16/utf32_to_utf16.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=scalar/utf32_to_utf16/utf32_to_utf16.h | |
+ /* begin file src/scalar/utf32_to_utf16/utf32_to_utf16.h */ | |
+ #ifndef SIMDUTF_UTF32_TO_UTF16_H | |
+ #define SIMDUTF_UTF32_TO_UTF16_H | |
+@@ -10682,7 +10679,7 @@ inline result convert_with_errors(const char32_t* buf, size_t len, char16_t* utf | |
+ #endif | |
+ /* end file src/scalar/utf32_to_utf16/utf32_to_utf16.h */ | |
+ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=scalar/utf16_to_utf8/valid_utf16_to_utf8.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=scalar/utf16_to_utf8/valid_utf16_to_utf8.h | |
+ /* begin file src/scalar/utf16_to_utf8/valid_utf16_to_utf8.h */ | |
+ #ifndef SIMDUTF_VALID_UTF16_TO_UTF8_H | |
+ #define SIMDUTF_VALID_UTF16_TO_UTF8_H | |
+@@ -10757,7 +10754,7 @@ inline size_t convert_valid(const char16_t* buf, size_t len, char* utf8_output) | |
+ | |
+ #endif | |
+ /* end file src/scalar/utf16_to_utf8/valid_utf16_to_utf8.h */ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=scalar/utf16_to_utf8/utf16_to_utf8.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=scalar/utf16_to_utf8/utf16_to_utf8.h | |
+ /* begin file src/scalar/utf16_to_utf8/utf16_to_utf8.h */ | |
+ #ifndef SIMDUTF_UTF16_TO_UTF8_H | |
+ #define SIMDUTF_UTF16_TO_UTF8_H | |
+@@ -10893,7 +10890,7 @@ inline result convert_with_errors(const char16_t* buf, size_t len, char* utf8_ou | |
+ #endif | |
+ /* end file src/scalar/utf16_to_utf8/utf16_to_utf8.h */ | |
+ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=scalar/utf16_to_utf32/valid_utf16_to_utf32.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=scalar/utf16_to_utf32/valid_utf16_to_utf32.h | |
+ /* begin file src/scalar/utf16_to_utf32/valid_utf16_to_utf32.h */ | |
+ #ifndef SIMDUTF_VALID_UTF16_TO_UTF32_H | |
+ #define SIMDUTF_VALID_UTF16_TO_UTF32_H | |
+@@ -10935,7 +10932,7 @@ inline size_t convert_valid(const char16_t* buf, size_t len, char32_t* utf32_out | |
+ | |
+ #endif | |
+ /* end file src/scalar/utf16_to_utf32/valid_utf16_to_utf32.h */ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=scalar/utf16_to_utf32/utf16_to_utf32.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=scalar/utf16_to_utf32/utf16_to_utf32.h | |
+ /* begin file src/scalar/utf16_to_utf32/utf16_to_utf32.h */ | |
+ #ifndef SIMDUTF_UTF16_TO_UTF32_H | |
+ #define SIMDUTF_UTF16_TO_UTF32_H | |
+@@ -11007,7 +11004,7 @@ inline result convert_with_errors(const char16_t* buf, size_t len, char32_t* utf | |
+ #endif | |
+ /* end file src/scalar/utf16_to_utf32/utf16_to_utf32.h */ | |
+ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=scalar/utf8_to_utf16/valid_utf8_to_utf16.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=scalar/utf8_to_utf16/valid_utf8_to_utf16.h | |
+ /* begin file src/scalar/utf8_to_utf16/valid_utf8_to_utf16.h */ | |
+ #ifndef SIMDUTF_VALID_UTF8_TO_UTF16_H | |
+ #define SIMDUTF_VALID_UTF8_TO_UTF16_H | |
+@@ -11092,7 +11089,7 @@ inline size_t convert_valid(const char* buf, size_t len, char16_t* utf16_output) | |
+ | |
+ #endif | |
+ /* end file src/scalar/utf8_to_utf16/valid_utf8_to_utf16.h */ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=scalar/utf8_to_utf16/utf8_to_utf16.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=scalar/utf8_to_utf16/utf8_to_utf16.h | |
+ /* begin file src/scalar/utf8_to_utf16/utf8_to_utf16.h */ | |
+ #ifndef SIMDUTF_UTF8_TO_UTF16_H | |
+ #define SIMDUTF_UTF8_TO_UTF16_H | |
+@@ -11299,8 +11296,10 @@ template <endianness endian> | |
+ inline result rewind_and_convert_with_errors(size_t prior_bytes, const char* buf, size_t len, char16_t* utf16_output) { | |
+ size_t extra_len{0}; | |
+ // We potentially need to go back in time and find a leading byte. | |
+- size_t how_far_back = 3; // 3 bytes in the past + current position | |
+- if(how_far_back >= prior_bytes) { how_far_back = prior_bytes; } | |
++ // In theory '3' would be sufficient, but sometimes the error can go back quite far. | |
++ size_t how_far_back = prior_bytes; | |
++ // size_t how_far_back = 3; // 3 bytes in the past + current position | |
++ // if(how_far_back >= prior_bytes) { how_far_back = prior_bytes; } | |
+ bool found_leading_bytes{false}; | |
+ // important: it is i <= how_far_back and not 'i < how_far_back'. | |
+ for(size_t i = 0; i <= how_far_back; i++) { | |
+@@ -11340,7 +11339,7 @@ inline result rewind_and_convert_with_errors(size_t prior_bytes, const char* buf | |
+ #endif | |
+ /* end file src/scalar/utf8_to_utf16/utf8_to_utf16.h */ | |
+ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=scalar/utf8_to_utf32/valid_utf8_to_utf32.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=scalar/utf8_to_utf32/valid_utf8_to_utf32.h | |
+ /* begin file src/scalar/utf8_to_utf32/valid_utf8_to_utf32.h */ | |
+ #ifndef SIMDUTF_VALID_UTF8_TO_UTF32_H | |
+ #define SIMDUTF_VALID_UTF8_TO_UTF32_H | |
+@@ -11406,7 +11405,7 @@ inline size_t convert_valid(const char* buf, size_t len, char32_t* utf32_output) | |
+ | |
+ #endif | |
+ /* end file src/scalar/utf8_to_utf32/valid_utf8_to_utf32.h */ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=scalar/utf8_to_utf32/utf8_to_utf32.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=scalar/utf8_to_utf32/utf8_to_utf32.h | |
+ /* begin file src/scalar/utf8_to_utf32/utf8_to_utf32.h */ | |
+ #ifndef SIMDUTF_UTF8_TO_UTF32_H | |
+ #define SIMDUTF_UTF8_TO_UTF32_H | |
+@@ -11626,9 +11625,9 @@ SIMDUTF_DISABLE_UNDESIRED_WARNINGS | |
+ | |
+ | |
+ #if SIMDUTF_IMPLEMENTATION_ARM64 | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=arm64/implementation.cpp | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=arm64/implementation.cpp | |
+ /* begin file src/arm64/implementation.cpp */ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/arm64/begin.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=simdutf/arm64/begin.h | |
+ /* begin file src/simdutf/arm64/begin.h */ | |
+ // redefining SIMDUTF_IMPLEMENTATION to "arm64" | |
+ // #define SIMDUTF_IMPLEMENTATION arm64 | |
+@@ -11664,7 +11663,7 @@ simdutf_really_inline simd8<bool> must_be_2_3_continuation(const simd8<uint8_t> | |
+ return is_third_byte ^ is_fourth_byte; | |
+ } | |
+ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=arm64/arm_detect_encodings.cpp | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=arm64/arm_detect_encodings.cpp | |
+ /* begin file src/arm64/arm_detect_encodings.cpp */ | |
+ template<class checker> | |
+ // len is known to be a multiple of 2 when this is called | |
+@@ -11872,7 +11871,7 @@ int arm_detect_encodings(const char * buf, size_t len) { | |
+ } | |
+ /* end file src/arm64/arm_detect_encodings.cpp */ | |
+ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=arm64/arm_validate_utf16.cpp | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=arm64/arm_validate_utf16.cpp | |
+ /* begin file src/arm64/arm_validate_utf16.cpp */ | |
+ template <endianness big_endian> | |
+ const char16_t* arm_validate_utf16(const char16_t* input, size_t size) { | |
+@@ -12022,7 +12021,7 @@ const result arm_validate_utf16_with_errors(const char16_t* input, size_t size) | |
+ return result(error_code::SUCCESS, input - start); | |
+ } | |
+ /* end file src/arm64/arm_validate_utf16.cpp */ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=arm64/arm_validate_utf32le.cpp | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=arm64/arm_validate_utf32le.cpp | |
+ /* begin file src/arm64/arm_validate_utf32le.cpp */ | |
+ | |
+ const char32_t* arm_validate_utf32le(const char32_t* input, size_t size) { | |
+@@ -12087,7 +12086,7 @@ const result arm_validate_utf32le_with_errors(const char32_t* input, size_t size | |
+ } | |
+ /* end file src/arm64/arm_validate_utf32le.cpp */ | |
+ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=arm64/arm_convert_utf8_to_utf16.cpp | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=arm64/arm_convert_utf8_to_utf16.cpp | |
+ /* begin file src/arm64/arm_convert_utf8_to_utf16.cpp */ | |
+ // Convert up to 12 bytes from utf8 to utf16 using a mask indicating the | |
+ // end of the code points. Only the least significant 12 bits of the mask | |
+@@ -12206,6 +12205,14 @@ size_t convert_masked_utf8_to_utf16(const char *input, | |
+ utf16_output += 4; | |
+ } else if (idx < 209) { | |
+ // TWO (2) input code-words | |
++ ////////////// | |
++ // There might be garbage inputs where a leading byte mascarades as a four-byte | |
++ // leading byte (by being followed by 3 continuation byte), but is not greater than | |
++ // 0xf0. This could trigger a buffer overflow if we only counted leading | |
++ // bytes of the form 0xf0 as generating surrogate pairs, without further UTF-8 validation. | |
++ // Thus we must be careful to ensure that only leading bytes at least as large as 0xf0 generate surrogate pairs. | |
++ // We do as at the cost of an extra mask. | |
++ ///////////// | |
+ uint8x16_t sh = vld1q_u8(reinterpret_cast<const uint8_t*>(simdutf::tables::utf8_to_utf16::shufutf8[idx])); | |
+ uint8x16_t perm = vqtbl1q_u8(in, sh); | |
+ uint8x16_t ascii = vandq_u8(perm, vreinterpretq_u8_u32(vmovq_n_u32(0x7f))); | |
+@@ -12217,8 +12224,14 @@ size_t convert_masked_utf8_to_utf16(const char *input, | |
+ vreinterpretq_u8_u32(vshrq_n_u32(vreinterpretq_u32_u8(vandq_u8(perm, vreinterpretq_u8_u32(vmovq_n_u32(0x400000)))), 1)); | |
+ middlehighbyte = veorq_u8(correct, middlehighbyte); | |
+ uint8x16_t middlehighbyte_shifted = vreinterpretq_u8_u32(vshrq_n_u32(vreinterpretq_u32_u8(middlehighbyte), 4)); | |
+- uint8x16_t highbyte = vandq_u8(perm, vreinterpretq_u8_u32(vmovq_n_u32(0x07000000))); | |
+- uint8x16_t highbyte_shifted =vreinterpretq_u8_u32(vshrq_n_u32(vreinterpretq_u32_u8(highbyte), 6)); | |
++ // We deliberately carry the leading four bits if they are present, we remove | |
++ // them later when computing hightenbits. | |
++ uint8x16_t highbyte = vandq_u8(perm, vreinterpretq_u8_u32(vmovq_n_u32(0xff000000))); | |
++ uint8x16_t highbyte_shifted = vreinterpretq_u8_u32(vshrq_n_u32(vreinterpretq_u32_u8(highbyte), 6)); | |
++ // When we need to generate a surrogate pair (leading byte > 0xF0), then | |
++ // the corresponding 32-bit value in 'composed' will be greater than | |
++ // > (0xff00000>>6) or > 0x3c00000. This can be used later to identify the | |
++ // location of the surrogate pairs. | |
+ uint8x16_t composed = | |
+ vorrq_u8(vorrq_u8(ascii, middlebyte_shifted), | |
+ vorrq_u8(highbyte_shifted, middlehighbyte_shifted)); | |
+@@ -12226,7 +12239,8 @@ size_t convert_masked_utf8_to_utf16(const char *input, | |
+ vsubq_u32(vreinterpretq_u32_u8(composed), vmovq_n_u32(0x10000)); | |
+ uint32x4_t lowtenbits = | |
+ vandq_u32(composedminus, vmovq_n_u32(0x3ff)); | |
+- uint32x4_t hightenbits = vshrq_n_u32(composedminus, 10); | |
++ // Notice the 0x3ff mask: | |
++ uint32x4_t hightenbits = vandq_u32(vshrq_n_u32(composedminus, 10), vmovq_n_u32(0x3ff)); | |
+ uint32x4_t lowtenbitsadd = | |
+ vaddq_u32(lowtenbits, vmovq_n_u32(0xDC00)); | |
+ uint32x4_t hightenbitsadd = | |
+@@ -12244,13 +12258,13 @@ size_t convert_masked_utf8_to_utf16(const char *input, | |
+ uint32_t surrogate_buffer[4]; | |
+ vst1q_u32(surrogate_buffer, surrogates); | |
+ for (size_t i = 0; i < 3; i++) { | |
+- if (basic_buffer[i] < 65536) { | |
+- utf16_output[0] = !match_system(big_endian) ? uint16_t(basic_buffer_swap[i]) : uint16_t(basic_buffer[i]); | |
+- utf16_output++; | |
+- } else { | |
++ if(basic_buffer[i] > 0x3c00000) { | |
+ utf16_output[0] = uint16_t(surrogate_buffer[i] & 0xffff); | |
+ utf16_output[1] = uint16_t(surrogate_buffer[i] >> 16); | |
+ utf16_output += 2; | |
++ } else { | |
++ utf16_output[0] = !match_system(big_endian) ? uint16_t(basic_buffer_swap[i]) : uint16_t(basic_buffer[i]); | |
++ utf16_output++; | |
+ } | |
+ } | |
+ } else { | |
+@@ -12259,7 +12273,7 @@ size_t convert_masked_utf8_to_utf16(const char *input, | |
+ return consumed; | |
+ } | |
+ /* end file src/arm64/arm_convert_utf8_to_utf16.cpp */ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=arm64/arm_convert_utf8_to_utf32.cpp | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=arm64/arm_convert_utf8_to_utf32.cpp | |
+ /* begin file src/arm64/arm_convert_utf8_to_utf32.cpp */ | |
+ // Convert up to 12 bytes from utf8 to utf32 using a mask indicating the | |
+ // end of the code points. Only the least significant 12 bits of the mask | |
+@@ -12396,7 +12410,7 @@ size_t convert_masked_utf8_to_utf32(const char *input, | |
+ } | |
+ /* end file src/arm64/arm_convert_utf8_to_utf32.cpp */ | |
+ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=arm64/arm_convert_utf16_to_utf8.cpp | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=arm64/arm_convert_utf16_to_utf8.cpp | |
+ /* begin file src/arm64/arm_convert_utf16_to_utf8.cpp */ | |
+ /* | |
+ The vectorized algorithm works on single SSE register i.e., it | |
+@@ -12587,29 +12601,29 @@ std::pair<const char16_t*, char*> arm_convert_utf16_to_utf8(const char16_t* buf, | |
+ * t2 => [0ccc|cccc] [10cc|cccc] | |
+ * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) | |
+ */ | |
+-#define vec(x) vmovq_n_u16(static_cast<uint16_t>(x)) | |
++#define simdutf_vec(x) vmovq_n_u16(static_cast<uint16_t>(x)) | |
+ // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] | |
+ const uint16x8_t t0 = vreinterpretq_u16_u8(vqtbl1q_u8(vreinterpretq_u8_u16(in), vreinterpretq_u8_u16(dup_even))); | |
+ // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] | |
+- const uint16x8_t t1 = vandq_u16(t0, vec(0b0011111101111111)); | |
++ const uint16x8_t t1 = vandq_u16(t0, simdutf_vec(0b0011111101111111)); | |
+ // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] | |
+- const uint16x8_t t2 = vorrq_u16 (t1, vec(0b1000000000000000)); | |
++ const uint16x8_t t2 = vorrq_u16 (t1, simdutf_vec(0b1000000000000000)); | |
+ | |
+ // s0: [aaaa|bbbb|bbcc|cccc] => [0000|0000|0000|aaaa] | |
+ const uint16x8_t s0 = vshrq_n_u16(in, 12); | |
+ // s1: [aaaa|bbbb|bbcc|cccc] => [0000|bbbb|bb00|0000] | |
+- const uint16x8_t s1 = vandq_u16(in, vec(0b0000111111000000)); | |
++ const uint16x8_t s1 = vandq_u16(in, simdutf_vec(0b0000111111000000)); | |
+ // [0000|bbbb|bb00|0000] => [00bb|bbbb|0000|0000] | |
+ const uint16x8_t s1s = vshlq_n_u16(s1, 2); | |
+ // [00bb|bbbb|0000|aaaa] | |
+ const uint16x8_t s2 = vorrq_u16(s0, s1s); | |
+ // s3: [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] | |
+- const uint16x8_t s3 = vorrq_u16(s2, vec(0b1100000011100000)); | |
++ const uint16x8_t s3 = vorrq_u16(s2, simdutf_vec(0b1100000011100000)); | |
+ const uint16x8_t v_07ff = vmovq_n_u16((uint16_t)0x07FF); | |
+ const uint16x8_t one_or_two_bytes_bytemask = vcleq_u16(in, v_07ff); | |
+- const uint16x8_t m0 = vbicq_u16(vec(0b0100000000000000), one_or_two_bytes_bytemask); | |
++ const uint16x8_t m0 = vbicq_u16(simdutf_vec(0b0100000000000000), one_or_two_bytes_bytemask); | |
+ const uint16x8_t s4 = veorq_u16(s3, m0); | |
+-#undef vec | |
++#undef simdutf_vec | |
+ | |
+ // 4. expand words 16-bit => 32-bit | |
+ const uint8x16_t out0 = vreinterpretq_u8_u16(vzip1q_u16(t2, s4)); | |
+@@ -12854,29 +12868,29 @@ std::pair<result, char*> arm_convert_utf16_to_utf8_with_errors(const char16_t* b | |
+ * t2 => [0ccc|cccc] [10cc|cccc] | |
+ * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) | |
+ */ | |
+-#define vec(x) vmovq_n_u16(static_cast<uint16_t>(x)) | |
++#define simdutf_vec(x) vmovq_n_u16(static_cast<uint16_t>(x)) | |
+ // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] | |
+ const uint16x8_t t0 = vreinterpretq_u16_u8(vqtbl1q_u8(vreinterpretq_u8_u16(in), vreinterpretq_u8_u16(dup_even))); | |
+ // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] | |
+- const uint16x8_t t1 = vandq_u16(t0, vec(0b0011111101111111)); | |
++ const uint16x8_t t1 = vandq_u16(t0, simdutf_vec(0b0011111101111111)); | |
+ // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] | |
+- const uint16x8_t t2 = vorrq_u16 (t1, vec(0b1000000000000000)); | |
++ const uint16x8_t t2 = vorrq_u16 (t1, simdutf_vec(0b1000000000000000)); | |
+ | |
+ // s0: [aaaa|bbbb|bbcc|cccc] => [0000|0000|0000|aaaa] | |
+ const uint16x8_t s0 = vshrq_n_u16(in, 12); | |
+ // s1: [aaaa|bbbb|bbcc|cccc] => [0000|bbbb|bb00|0000] | |
+- const uint16x8_t s1 = vandq_u16(in, vec(0b0000111111000000)); | |
++ const uint16x8_t s1 = vandq_u16(in, simdutf_vec(0b0000111111000000)); | |
+ // [0000|bbbb|bb00|0000] => [00bb|bbbb|0000|0000] | |
+ const uint16x8_t s1s = vshlq_n_u16(s1, 2); | |
+ // [00bb|bbbb|0000|aaaa] | |
+ const uint16x8_t s2 = vorrq_u16(s0, s1s); | |
+ // s3: [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] | |
+- const uint16x8_t s3 = vorrq_u16(s2, vec(0b1100000011100000)); | |
++ const uint16x8_t s3 = vorrq_u16(s2, simdutf_vec(0b1100000011100000)); | |
+ const uint16x8_t v_07ff = vmovq_n_u16((uint16_t)0x07FF); | |
+ const uint16x8_t one_or_two_bytes_bytemask = vcleq_u16(in, v_07ff); | |
+- const uint16x8_t m0 = vbicq_u16(vec(0b0100000000000000), one_or_two_bytes_bytemask); | |
++ const uint16x8_t m0 = vbicq_u16(simdutf_vec(0b0100000000000000), one_or_two_bytes_bytemask); | |
+ const uint16x8_t s4 = veorq_u16(s3, m0); | |
+-#undef vec | |
++#undef simdutf_vec | |
+ | |
+ // 4. expand words 16-bit => 32-bit | |
+ const uint8x16_t out0 = vreinterpretq_u8_u16(vzip1q_u16(t2, s4)); | |
+@@ -12976,7 +12990,7 @@ std::pair<result, char*> arm_convert_utf16_to_utf8_with_errors(const char16_t* b | |
+ return std::make_pair(result(error_code::SUCCESS, buf - start), reinterpret_cast<char*>(utf8_output)); | |
+ } | |
+ /* end file src/arm64/arm_convert_utf16_to_utf8.cpp */ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=arm64/arm_convert_utf16_to_utf32.cpp | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=arm64/arm_convert_utf16_to_utf32.cpp | |
+ /* begin file src/arm64/arm_convert_utf16_to_utf32.cpp */ | |
+ /* | |
+ The vectorized algorithm works on single SSE register i.e., it | |
+@@ -13153,7 +13167,7 @@ std::pair<result, char32_t*> arm_convert_utf16_to_utf32_with_errors(const char16 | |
+ } | |
+ /* end file src/arm64/arm_convert_utf16_to_utf32.cpp */ | |
+ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=arm64/arm_convert_utf32_to_utf8.cpp | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=arm64/arm_convert_utf32_to_utf8.cpp | |
+ /* begin file src/arm64/arm_convert_utf32_to_utf8.cpp */ | |
+ std::pair<const char32_t*, char*> arm_convert_utf32_to_utf8(const char32_t* buf, size_t len, char* utf8_out) { | |
+ uint8_t * utf8_output = reinterpret_cast<uint8_t*>(utf8_out); | |
+@@ -13268,29 +13282,29 @@ std::pair<const char32_t*, char*> arm_convert_utf32_to_utf8(const char32_t* buf, | |
+ * t2 => [0ccc|cccc] [10cc|cccc] | |
+ * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) | |
+ */ | |
+- #define vec(x) vmovq_n_u16(static_cast<uint16_t>(x)) | |
++ #define simdutf_vec(x) vmovq_n_u16(static_cast<uint16_t>(x)) | |
+ // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] | |
+ const uint16x8_t t0 = vreinterpretq_u16_u8(vqtbl1q_u8(vreinterpretq_u8_u16(utf16_packed), vreinterpretq_u8_u16(dup_even))); | |
+ // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] | |
+- const uint16x8_t t1 = vandq_u16(t0, vec(0b0011111101111111)); | |
++ const uint16x8_t t1 = vandq_u16(t0, simdutf_vec(0b0011111101111111)); | |
+ // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] | |
+- const uint16x8_t t2 = vorrq_u16 (t1, vec(0b1000000000000000)); | |
++ const uint16x8_t t2 = vorrq_u16 (t1, simdutf_vec(0b1000000000000000)); | |
+ | |
+ // s0: [aaaa|bbbb|bbcc|cccc] => [0000|0000|0000|aaaa] | |
+ const uint16x8_t s0 = vshrq_n_u16(utf16_packed, 12); | |
+ // s1: [aaaa|bbbb|bbcc|cccc] => [0000|bbbb|bb00|0000] | |
+- const uint16x8_t s1 = vandq_u16(utf16_packed, vec(0b0000111111000000)); | |
++ const uint16x8_t s1 = vandq_u16(utf16_packed, simdutf_vec(0b0000111111000000)); | |
+ // [0000|bbbb|bb00|0000] => [00bb|bbbb|0000|0000] | |
+ const uint16x8_t s1s = vshlq_n_u16(s1, 2); | |
+ // [00bb|bbbb|0000|aaaa] | |
+ const uint16x8_t s2 = vorrq_u16(s0, s1s); | |
+ // s3: [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] | |
+- const uint16x8_t s3 = vorrq_u16(s2, vec(0b1100000011100000)); | |
++ const uint16x8_t s3 = vorrq_u16(s2, simdutf_vec(0b1100000011100000)); | |
+ const uint16x8_t v_07ff = vmovq_n_u16((uint16_t)0x07FF); | |
+ const uint16x8_t one_or_two_bytes_bytemask = vcleq_u16(utf16_packed, v_07ff); | |
+- const uint16x8_t m0 = vbicq_u16(vec(0b0100000000000000), one_or_two_bytes_bytemask); | |
++ const uint16x8_t m0 = vbicq_u16(simdutf_vec(0b0100000000000000), one_or_two_bytes_bytemask); | |
+ const uint16x8_t s4 = veorq_u16(s3, m0); | |
+- #undef vec | |
++ #undef simdutf_vec | |
+ | |
+ // 4. expand words 16-bit => 32-bit | |
+ const uint8x16_t out0 = vreinterpretq_u8_u16(vzip1q_u16(t2, s4)); | |
+@@ -13334,7 +13348,6 @@ std::pair<const char32_t*, char*> arm_convert_utf32_to_utf8(const char32_t* buf, | |
+ continue; | |
+ }*/ | |
+ const uint8_t mask0 = uint8_t(mask); | |
+- | |
+ const uint8_t* row0 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0]; | |
+ const uint8x16_t shuffle0 = vld1q_u8(row0 + 1); | |
+ const uint8x16_t utf8_0 = vqtbl1q_u8(out0, shuffle0); | |
+@@ -13508,29 +13521,29 @@ std::pair<result, char*> arm_convert_utf32_to_utf8_with_errors(const char32_t* b | |
+ * t2 => [0ccc|cccc] [10cc|cccc] | |
+ * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) | |
+ */ | |
+- #define vec(x) vmovq_n_u16(static_cast<uint16_t>(x)) | |
++ #define simdutf_vec(x) vmovq_n_u16(static_cast<uint16_t>(x)) | |
+ // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] | |
+ const uint16x8_t t0 = vreinterpretq_u16_u8(vqtbl1q_u8(vreinterpretq_u8_u16(utf16_packed), vreinterpretq_u8_u16(dup_even))); | |
+ // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] | |
+- const uint16x8_t t1 = vandq_u16(t0, vec(0b0011111101111111)); | |
++ const uint16x8_t t1 = vandq_u16(t0, simdutf_vec(0b0011111101111111)); | |
+ // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] | |
+- const uint16x8_t t2 = vorrq_u16 (t1, vec(0b1000000000000000)); | |
++ const uint16x8_t t2 = vorrq_u16 (t1, simdutf_vec(0b1000000000000000)); | |
+ | |
+ // s0: [aaaa|bbbb|bbcc|cccc] => [0000|0000|0000|aaaa] | |
+ const uint16x8_t s0 = vshrq_n_u16(utf16_packed, 12); | |
+ // s1: [aaaa|bbbb|bbcc|cccc] => [0000|bbbb|bb00|0000] | |
+- const uint16x8_t s1 = vandq_u16(utf16_packed, vec(0b0000111111000000)); | |
++ const uint16x8_t s1 = vandq_u16(utf16_packed, simdutf_vec(0b0000111111000000)); | |
+ // [0000|bbbb|bb00|0000] => [00bb|bbbb|0000|0000] | |
+ const uint16x8_t s1s = vshlq_n_u16(s1, 2); | |
+ // [00bb|bbbb|0000|aaaa] | |
+ const uint16x8_t s2 = vorrq_u16(s0, s1s); | |
+ // s3: [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] | |
+- const uint16x8_t s3 = vorrq_u16(s2, vec(0b1100000011100000)); | |
++ const uint16x8_t s3 = vorrq_u16(s2, simdutf_vec(0b1100000011100000)); | |
+ const uint16x8_t v_07ff = vmovq_n_u16((uint16_t)0x07FF); | |
+ const uint16x8_t one_or_two_bytes_bytemask = vcleq_u16(utf16_packed, v_07ff); | |
+- const uint16x8_t m0 = vbicq_u16(vec(0b0100000000000000), one_or_two_bytes_bytemask); | |
++ const uint16x8_t m0 = vbicq_u16(simdutf_vec(0b0100000000000000), one_or_two_bytes_bytemask); | |
+ const uint16x8_t s4 = veorq_u16(s3, m0); | |
+- #undef vec | |
++ #undef simdutf_vec | |
+ | |
+ // 4. expand words 16-bit => 32-bit | |
+ const uint8x16_t out0 = vreinterpretq_u8_u16(vzip1q_u16(t2, s4)); | |
+@@ -13626,7 +13639,7 @@ std::pair<result, char*> arm_convert_utf32_to_utf8_with_errors(const char32_t* b | |
+ return std::make_pair(result(error_code::SUCCESS, buf - start), reinterpret_cast<char*>(utf8_output)); | |
+ } | |
+ /* end file src/arm64/arm_convert_utf32_to_utf8.cpp */ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=arm64/arm_convert_utf32_to_utf16.cpp | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=arm64/arm_convert_utf32_to_utf16.cpp | |
+ /* begin file src/arm64/arm_convert_utf32_to_utf16.cpp */ | |
+ template <endianness big_endian> | |
+ std::pair<const char32_t*, char16_t*> arm_convert_utf32_to_utf16(const char32_t* buf, size_t len, char16_t* utf16_out) { | |
+@@ -13759,7 +13772,7 @@ std::pair<result, char16_t*> arm_convert_utf32_to_utf16_with_errors(const char32 | |
+ } // unnamed namespace | |
+ } // namespace arm64 | |
+ } // namespace simdutf | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=generic/buf_block_reader.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=generic/buf_block_reader.h | |
+ /* begin file src/generic/buf_block_reader.h */ | |
+ namespace simdutf { | |
+ namespace arm64 { | |
+@@ -13854,7 +13867,7 @@ simdutf_really_inline void buf_block_reader<STEP_SIZE>::advance() { | |
+ } // namespace arm64 | |
+ } // namespace simdutf | |
+ /* end file src/generic/buf_block_reader.h */ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=generic/utf8_validation/utf8_lookup4_algorithm.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=generic/utf8_validation/utf8_lookup4_algorithm.h | |
+ /* begin file src/generic/utf8_validation/utf8_lookup4_algorithm.h */ | |
+ namespace simdutf { | |
+ namespace arm64 { | |
+@@ -14043,7 +14056,7 @@ using utf8_validation::utf8_checker; | |
+ } // namespace arm64 | |
+ } // namespace simdutf | |
+ /* end file src/generic/utf8_validation/utf8_lookup4_algorithm.h */ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=generic/utf8_validation/utf8_validator.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=generic/utf8_validation/utf8_validator.h | |
+ /* begin file src/generic/utf8_validation/utf8_validator.h */ | |
+ namespace simdutf { | |
+ namespace arm64 { | |
+@@ -14170,7 +14183,7 @@ result generic_validate_ascii_with_errors(const char * input, size_t length) { | |
+ } // namespace simdutf | |
+ /* end file src/generic/utf8_validation/utf8_validator.h */ | |
+ // transcoding from UTF-8 to UTF-16 | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=generic/utf8_to_utf16/valid_utf8_to_utf16.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=generic/utf8_to_utf16/valid_utf8_to_utf16.h | |
+ /* begin file src/generic/utf8_to_utf16/valid_utf8_to_utf16.h */ | |
+ | |
+ | |
+@@ -14231,7 +14244,7 @@ simdutf_warn_unused size_t convert_valid(const char* input, size_t size, | |
+ utf8_end_of_code_point_mask >>= consumed; | |
+ } | |
+ // At this point there may remain between 0 and 12 bytes in the | |
+- // 64-byte block.These bytes will be processed again. So we have an | |
++ // 64-byte block. These bytes will be processed again. So we have an | |
+ // 80% efficiency (in the worst case). In practice we expect an | |
+ // 85% to 90% efficiency. | |
+ } | |
+@@ -14245,7 +14258,7 @@ simdutf_warn_unused size_t convert_valid(const char* input, size_t size, | |
+ } // namespace arm64 | |
+ } // namespace simdutf | |
+ /* end file src/generic/utf8_to_utf16/valid_utf8_to_utf16.h */ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=generic/utf8_to_utf16/utf8_to_utf16.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=generic/utf8_to_utf16/utf8_to_utf16.h | |
+ /* begin file src/generic/utf8_to_utf16/utf8_to_utf16.h */ | |
+ | |
+ | |
+@@ -14377,7 +14390,19 @@ using namespace simd; | |
+ simdutf_really_inline size_t convert(const char* in, size_t size, char16_t* utf16_output) { | |
+ size_t pos = 0; | |
+ char16_t* start{utf16_output}; | |
+- const size_t safety_margin = 16; // to avoid overruns! | |
++ // In the worst case, we have the haswell kernel which can cause an overflow of | |
++ // 8 bytes when calling convert_masked_utf8_to_utf16. If you skip the last 16 bytes, | |
++ // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate | |
++ // much more than 8 bytes. However, you cannot generally assume that you have valid | |
++ // UTF-8 input, so we are going to go back from the end counting 8 leading bytes, | |
++ // to give us a good margin. | |
++ size_t leading_byte = 0; | |
++ size_t margin = size; | |
++ for(; margin > 0 && leading_byte < 8; margin--) { | |
++ leading_byte += (int8_t(in[margin-1]) > -65); | |
++ } | |
++ // If the input is long enough, then we have that margin-1 is the eight last leading byte. | |
++ const size_t safety_margin = size - margin + 1; // to avoid overruns! | |
+ while(pos + 64 + safety_margin <= size) { | |
+ simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos)); | |
+ if(input.is_ascii()) { | |
+@@ -14422,7 +14447,7 @@ using namespace simd; | |
+ utf8_end_of_code_point_mask >>= consumed; | |
+ } | |
+ // At this point there may remain between 0 and 12 bytes in the | |
+- // 64-byte block.These bytes will be processed again. So we have an | |
++ // 64-byte block. These bytes will be processed again. So we have an | |
+ // 80% efficiency (in the worst case). In practice we expect an | |
+ // 85% to 90% efficiency. | |
+ } | |
+@@ -14440,7 +14465,19 @@ using namespace simd; | |
+ simdutf_really_inline result convert_with_errors(const char* in, size_t size, char16_t* utf16_output) { | |
+ size_t pos = 0; | |
+ char16_t* start{utf16_output}; | |
+- const size_t safety_margin = 16; // to avoid overruns! | |
++ // In the worst case, we have the haswell kernel which can cause an overflow of | |
++ // 8 bytes when calling convert_masked_utf8_to_utf16. If you skip the last 16 bytes, | |
++ // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate | |
++ // much more than 8 bytes. However, you cannot generally assume that you have valid | |
++ // UTF-8 input, so we are going to go back from the end counting 8 leading bytes, | |
++ // to give us a good margin. | |
++ size_t leading_byte = 0; | |
++ size_t margin = size; | |
++ for(; margin > 0 && leading_byte < 8; margin--) { | |
++ leading_byte += (int8_t(in[margin-1]) > -65); | |
++ } | |
++ // If the input is long enough, then we have that margin-1 is the eight last leading byte. | |
++ const size_t safety_margin = size - margin + 1; // to avoid overruns! | |
+ while(pos + 64 + safety_margin <= size) { | |
+ simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos)); | |
+ if(input.is_ascii()) { | |
+@@ -14492,7 +14529,7 @@ using namespace simd; | |
+ utf8_end_of_code_point_mask >>= consumed; | |
+ } | |
+ // At this point there may remain between 0 and 12 bytes in the | |
+- // 64-byte block.These bytes will be processed again. So we have an | |
++ // 64-byte block. These bytes will be processed again. So we have an | |
+ // 80% efficiency (in the worst case). In practice we expect an | |
+ // 85% to 90% efficiency. | |
+ } | |
+@@ -14529,7 +14566,7 @@ using namespace simd; | |
+ } // namespace simdutf | |
+ /* end file src/generic/utf8_to_utf16/utf8_to_utf16.h */ | |
+ // transcoding from UTF-8 to UTF-32 | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=generic/utf8_to_utf32/valid_utf8_to_utf32.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=generic/utf8_to_utf32/valid_utf8_to_utf32.h | |
+ /* begin file src/generic/utf8_to_utf32/valid_utf8_to_utf32.h */ | |
+ | |
+ namespace simdutf { | |
+@@ -14575,7 +14612,7 @@ simdutf_warn_unused size_t convert_valid(const char* input, size_t size, | |
+ } // namespace arm64 | |
+ } // namespace simdutf | |
+ /* end file src/generic/utf8_to_utf32/valid_utf8_to_utf32.h */ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=generic/utf8_to_utf32/utf8_to_utf32.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=generic/utf8_to_utf32/utf8_to_utf32.h | |
+ /* begin file src/generic/utf8_to_utf32/utf8_to_utf32.h */ | |
+ | |
+ | |
+@@ -14707,7 +14744,19 @@ using namespace simd; | |
+ simdutf_really_inline size_t convert(const char* in, size_t size, char32_t* utf32_output) { | |
+ size_t pos = 0; | |
+ char32_t* start{utf32_output}; | |
+- const size_t safety_margin = 16; // to avoid overruns! | |
++ // In the worst case, we have the haswell kernel which can cause an overflow of | |
++ // 8 bytes when calling convert_masked_utf8_to_utf32. If you skip the last 16 bytes, | |
++ // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate | |
++ // much more than 8 bytes. However, you cannot generally assume that you have valid | |
++ // UTF-8 input, so we are going to go back from the end counting 4 leading bytes, | |
++ // to give us a good margin. | |
++ size_t leading_byte = 0; | |
++ size_t margin = size; | |
++ for(; margin > 0 && leading_byte < 4; margin--) { | |
++ leading_byte += (int8_t(in[margin-1]) > -65); | |
++ } | |
++ // If the input is long enough, then we have that margin-1 is the fourth last leading byte. | |
++ const size_t safety_margin = size - margin + 1; // to avoid overruns! | |
+ while(pos + 64 + safety_margin <= size) { | |
+ simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos)); | |
+ if(input.is_ascii()) { | |
+@@ -14752,7 +14801,7 @@ using namespace simd; | |
+ utf8_end_of_code_point_mask >>= consumed; | |
+ } | |
+ // At this point there may remain between 0 and 12 bytes in the | |
+- // 64-byte block.These bytes will be processed again. So we have an | |
++ // 64-byte block. These bytes will be processed again. So we have an | |
+ // 80% efficiency (in the worst case). In practice we expect an | |
+ // 85% to 90% efficiency. | |
+ } | |
+@@ -14769,7 +14818,19 @@ using namespace simd; | |
+ simdutf_really_inline result convert_with_errors(const char* in, size_t size, char32_t* utf32_output) { | |
+ size_t pos = 0; | |
+ char32_t* start{utf32_output}; | |
+- const size_t safety_margin = 16; // to avoid overruns! | |
++ // In the worst case, we have the haswell kernel which can cause an overflow of | |
++ // 8 bytes when calling convert_masked_utf8_to_utf32. If you skip the last 16 bytes, | |
++ // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate | |
++ // much more than 8 bytes. However, you cannot generally assume that you have valid | |
++ // UTF-8 input, so we are going to go back from the end counting 4 leading bytes, | |
++ // to give us a good margin. | |
++ size_t leading_byte = 0; | |
++ size_t margin = size; | |
++ for(; margin > 0 && leading_byte < 4; margin--) { | |
++ leading_byte += (int8_t(in[margin-1]) > -65); | |
++ } | |
++ // If the input is long enough, then we have that margin-1 is the fourth last leading byte. | |
++ const size_t safety_margin = size - margin + 1; // to avoid overruns! | |
+ while(pos + 64 + safety_margin <= size) { | |
+ simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos)); | |
+ if(input.is_ascii()) { | |
+@@ -14819,7 +14880,7 @@ using namespace simd; | |
+ utf8_end_of_code_point_mask >>= consumed; | |
+ } | |
+ // At this point there may remain between 0 and 12 bytes in the | |
+- // 64-byte block.These bytes will be processed again. So we have an | |
++ // 64-byte block. These bytes will be processed again. So we have an | |
+ // 80% efficiency (in the worst case). In practice we expect an | |
+ // 85% to 90% efficiency. | |
+ } | |
+@@ -14852,7 +14913,7 @@ using namespace simd; | |
+ } // namespace simdutf | |
+ /* end file src/generic/utf8_to_utf32/utf8_to_utf32.h */ | |
+ // other functions | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=generic/utf8.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=generic/utf8.h | |
+ /* begin file src/generic/utf8.h */ | |
+ | |
+ namespace simdutf { | |
+@@ -14899,7 +14960,7 @@ simdutf_really_inline size_t utf32_length_from_utf8(const char* in, size_t size) | |
+ } // namespace arm64 | |
+ } // namespace simdutf | |
+ /* end file src/generic/utf8.h */ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=generic/utf16.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=generic/utf16.h | |
+ /* begin file src/generic/utf16.h */ | |
+ namespace simdutf { | |
+ namespace arm64 { | |
+@@ -15434,15 +15495,15 @@ simdutf_warn_unused size_t implementation::utf32_length_from_utf8(const char * i | |
+ } // namespace arm64 | |
+ } // namespace simdutf | |
+ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/arm64/end.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=simdutf/arm64/end.h | |
+ /* begin file src/simdutf/arm64/end.h */ | |
+ /* end file src/simdutf/arm64/end.h */ | |
+ /* end file src/arm64/implementation.cpp */ | |
+ #endif | |
+ #if SIMDUTF_IMPLEMENTATION_FALLBACK | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=fallback/implementation.cpp | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=fallback/implementation.cpp | |
+ /* begin file src/fallback/implementation.cpp */ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/fallback/begin.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=simdutf/fallback/begin.h | |
+ /* begin file src/simdutf/fallback/begin.h */ | |
+ // redefining SIMDUTF_IMPLEMENTATION to "fallback" | |
+ // #define SIMDUTF_IMPLEMENTATION fallback | |
+@@ -15686,17 +15747,17 @@ simdutf_warn_unused size_t implementation::utf32_length_from_utf8(const char * i | |
+ } // namespace fallback | |
+ } // namespace simdutf | |
+ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/fallback/end.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=simdutf/fallback/end.h | |
+ /* begin file src/simdutf/fallback/end.h */ | |
+ /* end file src/simdutf/fallback/end.h */ | |
+ /* end file src/fallback/implementation.cpp */ | |
+ #endif | |
+ #if SIMDUTF_IMPLEMENTATION_ICELAKE | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=icelake/implementation.cpp | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=icelake/implementation.cpp | |
+ /* begin file src/icelake/implementation.cpp */ | |
+ | |
+ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/icelake/begin.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=simdutf/icelake/begin.h | |
+ /* begin file src/simdutf/icelake/begin.h */ | |
+ // redefining SIMDUTF_IMPLEMENTATION to "icelake" | |
+ // #define SIMDUTF_IMPLEMENTATION icelake | |
+@@ -15717,7 +15778,7 @@ namespace { | |
+ #ifndef SIMDUTF_ICELAKE_H | |
+ #error "icelake.h must be included" | |
+ #endif | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=icelake/icelake_utf8_common.inl.cpp | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=icelake/icelake_utf8_common.inl.cpp | |
+ /* begin file src/icelake/icelake_utf8_common.inl.cpp */ | |
+ // Common procedures for both validating and non-validating conversions from UTF-8. | |
+ enum block_processing_mode { SIMDUTF_FULL, SIMDUTF_TAIL}; | |
+@@ -16382,7 +16443,7 @@ simdutf_really_inline __m512i expand_utf8_to_utf32(__m512i input) { | |
+ return expanded_utf8_to_utf32(char_class, input); | |
+ } | |
+ /* end file src/icelake/icelake_utf8_common.inl.cpp */ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=icelake/icelake_macros.inl.cpp | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=icelake/icelake_macros.inl.cpp | |
+ /* begin file src/icelake/icelake_macros.inl.cpp */ | |
+ | |
+ /* | |
+@@ -16518,7 +16579,7 @@ simdutf_really_inline __m512i expand_utf8_to_utf32(__m512i input) { | |
+ } \ | |
+ } | |
+ /* end file src/icelake/icelake_macros.inl.cpp */ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=icelake/icelake_from_valid_utf8.inl.cpp | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=icelake/icelake_from_valid_utf8.inl.cpp | |
+ /* begin file src/icelake/icelake_from_valid_utf8.inl.cpp */ | |
+ // file included directly | |
+ | |
+@@ -16657,7 +16718,7 @@ std::pair<const char*, OUTPUT*> valid_utf8_to_fixed_length(const char* str, size | |
+ | |
+ using utf8_to_utf16_result = std::pair<const char*, char16_t*>; | |
+ /* end file src/icelake/icelake_from_valid_utf8.inl.cpp */ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=icelake/icelake_utf8_validation.inl.cpp | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=icelake/icelake_utf8_validation.inl.cpp | |
+ /* begin file src/icelake/icelake_utf8_validation.inl.cpp */ | |
+ // file included directly | |
+ | |
+@@ -16787,7 +16848,7 @@ simdutf_really_inline __m512i check_special_cases(__m512i input, const __m512i p | |
+ | |
+ }; // struct avx512_utf8_checker | |
+ /* end file src/icelake/icelake_utf8_validation.inl.cpp */ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=icelake/icelake_from_utf8.inl.cpp | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=icelake/icelake_from_utf8.inl.cpp | |
+ /* begin file src/icelake/icelake_from_utf8.inl.cpp */ | |
+ // file included directly | |
+ | |
+@@ -17090,7 +17151,7 @@ std::tuple<const char*, OUTPUT*, bool> validating_utf8_to_fixed_length_with_cons | |
+ return {ptr, output, true}; | |
+ } | |
+ /* end file src/icelake/icelake_from_utf8.inl.cpp */ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=icelake/icelake_convert_utf16_to_utf32.inl.cpp | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=icelake/icelake_convert_utf16_to_utf32.inl.cpp | |
+ /* begin file src/icelake/icelake_convert_utf16_to_utf32.inl.cpp */ | |
+ // file included directly | |
+ | |
+@@ -17202,7 +17263,7 @@ std::tuple<const char16_t*, char32_t*, bool> convert_utf16_to_utf32(const char16 | |
+ return std::make_tuple(buf+carry, utf32_output, true); | |
+ } | |
+ /* end file src/icelake/icelake_convert_utf16_to_utf32.inl.cpp */ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=icelake/icelake_convert_utf32_to_utf8.inl.cpp | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=icelake/icelake_convert_utf32_to_utf8.inl.cpp | |
+ /* begin file src/icelake/icelake_convert_utf32_to_utf8.inl.cpp */ | |
+ // file included directly | |
+ | |
+@@ -17218,7 +17279,7 @@ std::pair<const char32_t*, char*> avx512_convert_utf32_to_utf8(const char32_t* b | |
+ __m256i running_max = _mm256_setzero_si256(); | |
+ __m256i forbidden_bytemask = _mm256_setzero_si256(); | |
+ | |
+- const size_t safety_margin = 11; // to avoid overruns, see issue https://github.com/simdutf/simdutf/issues/92 | |
++ const size_t safety_margin = 12; // to avoid overruns, see issue https://github.com/simdutf/simdutf/issues/92 | |
+ | |
+ while (buf + 16 + safety_margin <= end) { | |
+ __m256i in = _mm256_loadu_si256((__m256i*)buf); | |
+@@ -17329,25 +17390,25 @@ std::pair<const char32_t*, char*> avx512_convert_utf32_to_utf8(const char32_t* b | |
+ * t2 => [0ccc|cccc] [10cc|cccc] | |
+ * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) | |
+ */ | |
+-#define vec(x) _mm256_set1_epi16(static_cast<uint16_t>(x)) | |
++#define simdutf_vec(x) _mm256_set1_epi16(static_cast<uint16_t>(x)) | |
+ // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] | |
+ const __m256i t0 = _mm256_shuffle_epi8(in_16, dup_even); | |
+ // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] | |
+- const __m256i t1 = _mm256_and_si256(t0, vec(0b0011111101111111)); | |
++ const __m256i t1 = _mm256_and_si256(t0, simdutf_vec(0b0011111101111111)); | |
+ // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] | |
+- const __m256i t2 = _mm256_or_si256 (t1, vec(0b1000000000000000)); | |
++ const __m256i t2 = _mm256_or_si256 (t1, simdutf_vec(0b1000000000000000)); | |
+ | |
+ // [aaaa|bbbb|bbcc|cccc] => [0000|aaaa|bbbb|bbcc] | |
+ const __m256i s0 = _mm256_srli_epi16(in_16, 4); | |
+ // [0000|aaaa|bbbb|bbcc] => [0000|aaaa|bbbb|bb00] | |
+- const __m256i s1 = _mm256_and_si256(s0, vec(0b0000111111111100)); | |
++ const __m256i s1 = _mm256_and_si256(s0, simdutf_vec(0b0000111111111100)); | |
+ // [0000|aaaa|bbbb|bb00] => [00bb|bbbb|0000|aaaa] | |
+- const __m256i s2 = _mm256_maddubs_epi16(s1, vec(0x0140)); | |
++ const __m256i s2 = _mm256_maddubs_epi16(s1, simdutf_vec(0x0140)); | |
+ // [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] | |
+- const __m256i s3 = _mm256_or_si256(s2, vec(0b1100000011100000)); | |
+- const __m256i m0 = _mm256_andnot_si256(one_or_two_bytes_bytemask, vec(0b0100000000000000)); | |
++ const __m256i s3 = _mm256_or_si256(s2, simdutf_vec(0b1100000011100000)); | |
++ const __m256i m0 = _mm256_andnot_si256(one_or_two_bytes_bytemask, simdutf_vec(0b0100000000000000)); | |
+ const __m256i s4 = _mm256_xor_si256(s3, m0); | |
+-#undef vec | |
++#undef simdutf_vec | |
+ | |
+ // 4. expand words 16-bit => 32-bit | |
+ const __m256i out0 = _mm256_unpacklo_epi16(t2, s4); | |
+@@ -17459,7 +17520,7 @@ std::pair<result, char*> avx512_convert_utf32_to_utf8_with_errors(const char32_t | |
+ const __m256i v_7fffffff = _mm256_set1_epi32((uint32_t)0x7fffffff); | |
+ const __m256i v_10ffff = _mm256_set1_epi32((uint32_t)0x10ffff); | |
+ | |
+- const size_t safety_margin = 11; // to avoid overruns, see issue https://github.com/simdutf/simdutf/issues/92 | |
++ const size_t safety_margin = 12; // to avoid overruns, see issue https://github.com/simdutf/simdutf/issues/92 | |
+ | |
+ while (buf + 16 + safety_margin <= end) { | |
+ __m256i in = _mm256_loadu_si256((__m256i*)buf); | |
+@@ -17579,25 +17640,25 @@ std::pair<result, char*> avx512_convert_utf32_to_utf8_with_errors(const char32_t | |
+ * t2 => [0ccc|cccc] [10cc|cccc] | |
+ * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) | |
+ */ | |
+-#define vec(x) _mm256_set1_epi16(static_cast<uint16_t>(x)) | |
++#define simdutf_vec(x) _mm256_set1_epi16(static_cast<uint16_t>(x)) | |
+ // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] | |
+ const __m256i t0 = _mm256_shuffle_epi8(in_16, dup_even); | |
+ // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] | |
+- const __m256i t1 = _mm256_and_si256(t0, vec(0b0011111101111111)); | |
++ const __m256i t1 = _mm256_and_si256(t0, simdutf_vec(0b0011111101111111)); | |
+ // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] | |
+- const __m256i t2 = _mm256_or_si256 (t1, vec(0b1000000000000000)); | |
++ const __m256i t2 = _mm256_or_si256 (t1, simdutf_vec(0b1000000000000000)); | |
+ | |
+ // [aaaa|bbbb|bbcc|cccc] => [0000|aaaa|bbbb|bbcc] | |
+ const __m256i s0 = _mm256_srli_epi16(in_16, 4); | |
+ // [0000|aaaa|bbbb|bbcc] => [0000|aaaa|bbbb|bb00] | |
+- const __m256i s1 = _mm256_and_si256(s0, vec(0b0000111111111100)); | |
++ const __m256i s1 = _mm256_and_si256(s0, simdutf_vec(0b0000111111111100)); | |
+ // [0000|aaaa|bbbb|bb00] => [00bb|bbbb|0000|aaaa] | |
+- const __m256i s2 = _mm256_maddubs_epi16(s1, vec(0x0140)); | |
++ const __m256i s2 = _mm256_maddubs_epi16(s1, simdutf_vec(0x0140)); | |
+ // [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] | |
+- const __m256i s3 = _mm256_or_si256(s2, vec(0b1100000011100000)); | |
+- const __m256i m0 = _mm256_andnot_si256(one_or_two_bytes_bytemask, vec(0b0100000000000000)); | |
++ const __m256i s3 = _mm256_or_si256(s2, simdutf_vec(0b1100000011100000)); | |
++ const __m256i m0 = _mm256_andnot_si256(one_or_two_bytes_bytemask, simdutf_vec(0b0100000000000000)); | |
+ const __m256i s4 = _mm256_xor_si256(s3, m0); | |
+-#undef vec | |
++#undef simdutf_vec | |
+ | |
+ // 4. expand words 16-bit => 32-bit | |
+ const __m256i out0 = _mm256_unpacklo_epi16(t2, s4); | |
+@@ -17688,7 +17749,7 @@ std::pair<result, char*> avx512_convert_utf32_to_utf8_with_errors(const char32_t | |
+ return std::make_pair(result(error_code::SUCCESS, buf - start), utf8_output); | |
+ } | |
+ /* end file src/icelake/icelake_convert_utf32_to_utf8.inl.cpp */ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=icelake/icelake_convert_utf32_to_utf16.inl.cpp | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=icelake/icelake_convert_utf32_to_utf16.inl.cpp | |
+ /* begin file src/icelake/icelake_convert_utf32_to_utf16.inl.cpp */ | |
+ // file included directly | |
+ | |
+@@ -17697,7 +17758,7 @@ template <endianness big_endian> | |
+ std::pair<const char32_t*, char16_t*> avx512_convert_utf32_to_utf16(const char32_t* buf, size_t len, char16_t* utf16_output) { | |
+ const char32_t* end = buf + len; | |
+ | |
+- const size_t safety_margin = 11; // to avoid overruns, see issue https://github.com/simdutf/simdutf/issues/92 | |
++ const size_t safety_margin = 12; // to avoid overruns, see issue https://github.com/simdutf/simdutf/issues/92 | |
+ __m256i forbidden_bytemask = _mm256_setzero_si256(); | |
+ | |
+ | |
+@@ -17764,7 +17825,7 @@ std::pair<result, char16_t*> avx512_convert_utf32_to_utf16_with_errors(const cha | |
+ const char32_t* start = buf; | |
+ const char32_t* end = buf + len; | |
+ | |
+- const size_t safety_margin = 11; // to avoid overruns, see issue https://github.com/simdutf/simdutf/issues/92 | |
++ const size_t safety_margin = 12; // to avoid overruns, see issue https://github.com/simdutf/simdutf/issues/92 | |
+ | |
+ while (buf + 8 + safety_margin <= end) { | |
+ __m256i in = _mm256_loadu_si256((__m256i*)buf); | |
+@@ -17823,7 +17884,7 @@ std::pair<result, char16_t*> avx512_convert_utf32_to_utf16_with_errors(const cha | |
+ return std::make_pair(result(error_code::SUCCESS, buf - start), utf16_output); | |
+ } | |
+ /* end file src/icelake/icelake_convert_utf32_to_utf16.inl.cpp */ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=icelake/icelake_ascii_validation.inl.cpp | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=icelake/icelake_ascii_validation.inl.cpp | |
+ /* begin file src/icelake/icelake_ascii_validation.inl.cpp */ | |
+ // file included directly | |
+ | |
+@@ -17842,7 +17903,7 @@ bool validate_ascii(const char* buf, size_t len) { | |
+ return (_mm512_test_epi8_mask(running_or, running_or) == 0); | |
+ } | |
+ /* end file src/icelake/icelake_ascii_validation.inl.cpp */ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=icelake/icelake_utf32_validation.inl.cpp | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=icelake/icelake_utf32_validation.inl.cpp | |
+ /* begin file src/icelake/icelake_utf32_validation.inl.cpp */ | |
+ // file included directly | |
+ | |
+@@ -17874,7 +17935,7 @@ const char32_t* validate_utf32(const char32_t* buf, size_t len) { | |
+ return buf; | |
+ } | |
+ /* end file src/icelake/icelake_utf32_validation.inl.cpp */ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=icelake/icelake_convert_utf16_to_utf8.inl.cpp | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=icelake/icelake_convert_utf16_to_utf8.inl.cpp | |
+ /* begin file src/icelake/icelake_convert_utf16_to_utf8.inl.cpp */ | |
+ // file included directly | |
+ | |
+@@ -19186,7 +19247,7 @@ simdutf_warn_unused size_t implementation::utf32_length_from_utf8(const char * i | |
+ } // namespace icelake | |
+ } // namespace simdutf | |
+ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/icelake/end.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=simdutf/icelake/end.h | |
+ /* begin file src/simdutf/icelake/end.h */ | |
+ #if SIMDUTF_CAN_ALWAYS_RUN_ICELAKE | |
+ // nothing needed. | |
+@@ -19202,10 +19263,10 @@ SIMDUTF_POP_DISABLE_WARNINGS | |
+ /* end file src/icelake/implementation.cpp */ | |
+ #endif | |
+ #if SIMDUTF_IMPLEMENTATION_HASWELL | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=haswell/implementation.cpp | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=haswell/implementation.cpp | |
+ /* begin file src/haswell/implementation.cpp */ | |
+ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/haswell/begin.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=simdutf/haswell/begin.h | |
+ /* begin file src/simdutf/haswell/begin.h */ | |
+ // redefining SIMDUTF_IMPLEMENTATION to "haswell" | |
+ // #define SIMDUTF_IMPLEMENTATION haswell | |
+@@ -19248,7 +19309,7 @@ simdutf_really_inline simd8<bool> must_be_2_3_continuation(const simd8<uint8_t> | |
+ return simd8<int8_t>(is_third_byte | is_fourth_byte) > int8_t(0); | |
+ } | |
+ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=haswell/avx2_detect_encodings.cpp | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=haswell/avx2_detect_encodings.cpp | |
+ /* begin file src/haswell/avx2_detect_encodings.cpp */ | |
+ template<class checker> | |
+ // len is known to be a multiple of 2 when this is called | |
+@@ -19438,7 +19499,7 @@ int avx2_detect_encodings(const char * buf, size_t len) { | |
+ } | |
+ /* end file src/haswell/avx2_detect_encodings.cpp */ | |
+ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=haswell/avx2_validate_utf16.cpp | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=haswell/avx2_validate_utf16.cpp | |
+ /* begin file src/haswell/avx2_validate_utf16.cpp */ | |
+ /* | |
+ In UTF-16 words in range 0xD800 to 0xDFFF have special meaning. | |
+@@ -19639,7 +19700,7 @@ const result avx2_validate_utf16_with_errors(const char16_t* input, size_t size) | |
+ return result(error_code::SUCCESS, input - start); | |
+ } | |
+ /* end file src/haswell/avx2_validate_utf16.cpp */ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=haswell/avx2_validate_utf32le.cpp | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=haswell/avx2_validate_utf32le.cpp | |
+ /* begin file src/haswell/avx2_validate_utf32le.cpp */ | |
+ /* Returns: | |
+ - pointer to the last unprocessed character (a scalar fallback should check the rest); | |
+@@ -19705,7 +19766,7 @@ const result avx2_validate_utf32le_with_errors(const char32_t* input, size_t siz | |
+ } | |
+ /* end file src/haswell/avx2_validate_utf32le.cpp */ | |
+ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=haswell/avx2_convert_utf8_to_utf16.cpp | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=haswell/avx2_convert_utf8_to_utf16.cpp | |
+ /* begin file src/haswell/avx2_convert_utf8_to_utf16.cpp */ | |
+ // depends on "tables/utf8_to_utf16_tables.h" | |
+ | |
+@@ -19797,7 +19858,7 @@ size_t convert_masked_utf8_to_utf16(const char *input, | |
+ __m128i composed = _mm_or_si128(ascii, _mm_srli_epi16(highbyte, 2)); | |
+ if (big_endian) composed = _mm_shuffle_epi8(composed, swap); | |
+ _mm_storeu_si128((__m128i *)utf16_output, composed); | |
+- utf16_output += 6; // We wrote 12 bytes, 6 code points. | |
++ utf16_output += 6; // We wrote 12 bytes, 6 code points. There is a potential overflow of 4 bytes. | |
+ } else if (idx < 145) { | |
+ // FOUR (4) input code-words | |
+ const __m128i sh = | |
+@@ -19816,9 +19877,17 @@ size_t convert_masked_utf8_to_utf16(const char *input, | |
+ __m128i composed_repacked = _mm_packus_epi32(composed, composed); | |
+ if (big_endian) composed_repacked = _mm_shuffle_epi8(composed_repacked, swap); | |
+ _mm_storeu_si128((__m128i *)utf16_output, composed_repacked); | |
+- utf16_output += 4; | |
++ utf16_output += 4; // Here we overflow by 8 bytes. | |
+ } else if (idx < 209) { | |
+ // TWO (2) input code-words | |
++ ////////////// | |
++ // There might be garbage inputs where a leading byte mascarades as a four-byte | |
++ // leading byte (by being followed by 3 continuation byte), but is not greater than | |
++ // 0xf0. This could trigger a buffer overflow if we only counted leading | |
++ // bytes of the form 0xf0 as generating surrogate pairs, without further UTF-8 validation. | |
++ // Thus we must be careful to ensure that only leading bytes at least as large as 0xf0 generate surrogate pairs. | |
++ // We do as at the cost of an extra mask. | |
++ ///////////// | |
+ const __m128i sh = | |
+ _mm_loadu_si128((const __m128i *)simdutf::tables::utf8_to_utf16::shufutf8[idx]); | |
+ const __m128i perm = _mm_shuffle_epi8(in, sh); | |
+@@ -19831,8 +19900,14 @@ size_t convert_masked_utf8_to_utf16(const char *input, | |
+ _mm_srli_epi32(_mm_and_si128(perm, _mm_set1_epi32(0x400000)), 1); | |
+ middlehighbyte = _mm_xor_si128(correct, middlehighbyte); | |
+ const __m128i middlehighbyte_shifted = _mm_srli_epi32(middlehighbyte, 4); | |
+- const __m128i highbyte = _mm_and_si128(perm, _mm_set1_epi32(0x07000000)); | |
++ // We deliberately carry the leading four bits in highbyte if they are present, | |
++ // we remove them later when computing hightenbits. | |
++ const __m128i highbyte = _mm_and_si128(perm, _mm_set1_epi32(0xff000000)); | |
+ const __m128i highbyte_shifted = _mm_srli_epi32(highbyte, 6); | |
++ // When we need to generate a surrogate pair (leading byte > 0xF0), then | |
++ // the corresponding 32-bit value in 'composed' will be greater than | |
++ // > (0xff00000>>6) or > 0x3c00000. This can be used later to identify the | |
++ // location of the surrogate pairs. | |
+ const __m128i composed = | |
+ _mm_or_si128(_mm_or_si128(ascii, middlebyte_shifted), | |
+ _mm_or_si128(highbyte_shifted, middlehighbyte_shifted)); | |
+@@ -19840,7 +19915,8 @@ size_t convert_masked_utf8_to_utf16(const char *input, | |
+ _mm_sub_epi32(composed, _mm_set1_epi32(0x10000)); | |
+ const __m128i lowtenbits = | |
+ _mm_and_si128(composedminus, _mm_set1_epi32(0x3ff)); | |
+- const __m128i hightenbits = _mm_srli_epi32(composedminus, 10); | |
++ // Notice the 0x3ff mask: | |
++ const __m128i hightenbits = _mm_and_si128(_mm_srli_epi32(composedminus, 10), _mm_set1_epi32(0x3ff)); | |
+ const __m128i lowtenbitsadd = | |
+ _mm_add_epi32(lowtenbits, _mm_set1_epi32(0xDC00)); | |
+ const __m128i hightenbitsadd = | |
+@@ -19858,13 +19934,13 @@ size_t convert_masked_utf8_to_utf16(const char *input, | |
+ uint32_t surrogate_buffer[4]; | |
+ _mm_storeu_si128((__m128i *)surrogate_buffer, surrogates); | |
+ for (size_t i = 0; i < 3; i++) { | |
+- if (basic_buffer[i] < 65536) { | |
+- utf16_output[0] = big_endian ? uint16_t(basic_buffer_swap[i]) : uint16_t(basic_buffer[i]); | |
+- utf16_output++; | |
+- } else { | |
++ if(basic_buffer[i] > 0x3c00000) { | |
+ utf16_output[0] = uint16_t(surrogate_buffer[i] & 0xffff); | |
+ utf16_output[1] = uint16_t(surrogate_buffer[i] >> 16); | |
+ utf16_output += 2; | |
++ } else { | |
++ utf16_output[0] = big_endian ? uint16_t(basic_buffer_swap[i]) : uint16_t(basic_buffer[i]); | |
++ utf16_output++; | |
+ } | |
+ } | |
+ } else { | |
+@@ -19873,7 +19949,7 @@ size_t convert_masked_utf8_to_utf16(const char *input, | |
+ return consumed; | |
+ } | |
+ /* end file src/haswell/avx2_convert_utf8_to_utf16.cpp */ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=haswell/avx2_convert_utf8_to_utf32.cpp | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=haswell/avx2_convert_utf8_to_utf32.cpp | |
+ /* begin file src/haswell/avx2_convert_utf8_to_utf32.cpp */ | |
+ // depends on "tables/utf8_to_utf16_tables.h" | |
+ | |
+@@ -19955,7 +20031,8 @@ size_t convert_masked_utf8_to_utf32(const char *input, | |
+ const __m128i highbyte = _mm_and_si128(perm, _mm_set1_epi16(0x1f00)); | |
+ const __m128i composed = _mm_or_si128(ascii, _mm_srli_epi16(highbyte, 2)); | |
+ _mm256_storeu_si256((__m256i *)utf32_output, _mm256_cvtepu16_epi32(composed)); | |
+- utf32_output += 6; // We wrote 12 bytes, 6 code points. | |
++ utf32_output += 6; // We wrote 24 bytes, 6 code points. There is a potential | |
++ // overflow of 32 - 24 = 8 bytes. | |
+ } else if (idx < 145) { | |
+ // FOUR (4) input code-words | |
+ const __m128i sh = | |
+@@ -19993,7 +20070,7 @@ size_t convert_masked_utf8_to_utf32(const char *input, | |
+ _mm_or_si128(_mm_or_si128(ascii, middlebyte_shifted), | |
+ _mm_or_si128(highbyte_shifted, middlehighbyte_shifted)); | |
+ _mm_storeu_si128((__m128i *)utf32_output, composed); | |
+- utf32_output += 3; | |
++ utf32_output += 3; // We wrote 3 * 4 bytes, there is a potential overflow of 4 bytes. | |
+ } else { | |
+ // here we know that there is an error but we do not handle errors | |
+ } | |
+@@ -20001,7 +20078,7 @@ size_t convert_masked_utf8_to_utf32(const char *input, | |
+ } | |
+ /* end file src/haswell/avx2_convert_utf8_to_utf32.cpp */ | |
+ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=haswell/avx2_convert_utf16_to_utf8.cpp | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=haswell/avx2_convert_utf16_to_utf8.cpp | |
+ /* begin file src/haswell/avx2_convert_utf16_to_utf8.cpp */ | |
+ /* | |
+ The vectorized algorithm works on single SSE register i.e., it | |
+@@ -20064,7 +20141,7 @@ std::pair<const char16_t*, char*> avx2_convert_utf16_to_utf8(const char16_t* buf | |
+ const __m256i v_f800 = _mm256_set1_epi16((int16_t)0xf800); | |
+ const __m256i v_d800 = _mm256_set1_epi16((int16_t)0xd800); | |
+ const __m256i v_c080 = _mm256_set1_epi16((int16_t)0xc080); | |
+- const size_t safety_margin = 11; // to avoid overruns, see issue https://github.com/simdutf/simdutf/issues/92 | |
++ const size_t safety_margin = 12; // to avoid overruns, see issue https://github.com/simdutf/simdutf/issues/92 | |
+ | |
+ while (buf + 16 + safety_margin <= end) { | |
+ __m256i in = _mm256_loadu_si256((__m256i*)buf); | |
+@@ -20178,25 +20255,25 @@ std::pair<const char16_t*, char*> avx2_convert_utf16_to_utf8(const char16_t* buf | |
+ * t2 => [0ccc|cccc] [10cc|cccc] | |
+ * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) | |
+ */ | |
+-#define vec(x) _mm256_set1_epi16(static_cast<uint16_t>(x)) | |
++#define simdutf_vec(x) _mm256_set1_epi16(static_cast<uint16_t>(x)) | |
+ // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] | |
+ const __m256i t0 = _mm256_shuffle_epi8(in, dup_even); | |
+ // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] | |
+- const __m256i t1 = _mm256_and_si256(t0, vec(0b0011111101111111)); | |
++ const __m256i t1 = _mm256_and_si256(t0, simdutf_vec(0b0011111101111111)); | |
+ // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] | |
+- const __m256i t2 = _mm256_or_si256 (t1, vec(0b1000000000000000)); | |
++ const __m256i t2 = _mm256_or_si256 (t1, simdutf_vec(0b1000000000000000)); | |
+ | |
+ // [aaaa|bbbb|bbcc|cccc] => [0000|aaaa|bbbb|bbcc] | |
+ const __m256i s0 = _mm256_srli_epi16(in, 4); | |
+ // [0000|aaaa|bbbb|bbcc] => [0000|aaaa|bbbb|bb00] | |
+- const __m256i s1 = _mm256_and_si256(s0, vec(0b0000111111111100)); | |
++ const __m256i s1 = _mm256_and_si256(s0, simdutf_vec(0b0000111111111100)); | |
+ // [0000|aaaa|bbbb|bb00] => [00bb|bbbb|0000|aaaa] | |
+- const __m256i s2 = _mm256_maddubs_epi16(s1, vec(0x0140)); | |
++ const __m256i s2 = _mm256_maddubs_epi16(s1, simdutf_vec(0x0140)); | |
+ // [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] | |
+- const __m256i s3 = _mm256_or_si256(s2, vec(0b1100000011100000)); | |
+- const __m256i m0 = _mm256_andnot_si256(one_or_two_bytes_bytemask, vec(0b0100000000000000)); | |
++ const __m256i s3 = _mm256_or_si256(s2, simdutf_vec(0b1100000011100000)); | |
++ const __m256i m0 = _mm256_andnot_si256(one_or_two_bytes_bytemask, simdutf_vec(0b0100000000000000)); | |
+ const __m256i s4 = _mm256_xor_si256(s3, m0); | |
+-#undef vec | |
++#undef simdutf_vec | |
+ | |
+ // 4. expand words 16-bit => 32-bit | |
+ const __m256i out0 = _mm256_unpacklo_epi16(t2, s4); | |
+@@ -20307,7 +20384,7 @@ std::pair<result, char*> avx2_convert_utf16_to_utf8_with_errors(const char16_t* | |
+ const __m256i v_f800 = _mm256_set1_epi16((int16_t)0xf800); | |
+ const __m256i v_d800 = _mm256_set1_epi16((int16_t)0xd800); | |
+ const __m256i v_c080 = _mm256_set1_epi16((int16_t)0xc080); | |
+- const size_t safety_margin = 11; // to avoid overruns, see issue https://github.com/simdutf/simdutf/issues/92 | |
++ const size_t safety_margin = 12; // to avoid overruns, see issue https://github.com/simdutf/simdutf/issues/92 | |
+ | |
+ while (buf + 16 + safety_margin <= end) { | |
+ __m256i in = _mm256_loadu_si256((__m256i*)buf); | |
+@@ -20421,25 +20498,25 @@ std::pair<result, char*> avx2_convert_utf16_to_utf8_with_errors(const char16_t* | |
+ * t2 => [0ccc|cccc] [10cc|cccc] | |
+ * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) | |
+ */ | |
+-#define vec(x) _mm256_set1_epi16(static_cast<uint16_t>(x)) | |
++#define simdutf_vec(x) _mm256_set1_epi16(static_cast<uint16_t>(x)) | |
+ // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] | |
+ const __m256i t0 = _mm256_shuffle_epi8(in, dup_even); | |
+ // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] | |
+- const __m256i t1 = _mm256_and_si256(t0, vec(0b0011111101111111)); | |
++ const __m256i t1 = _mm256_and_si256(t0, simdutf_vec(0b0011111101111111)); | |
+ // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] | |
+- const __m256i t2 = _mm256_or_si256 (t1, vec(0b1000000000000000)); | |
++ const __m256i t2 = _mm256_or_si256 (t1, simdutf_vec(0b1000000000000000)); | |
+ | |
+ // [aaaa|bbbb|bbcc|cccc] => [0000|aaaa|bbbb|bbcc] | |
+ const __m256i s0 = _mm256_srli_epi16(in, 4); | |
+ // [0000|aaaa|bbbb|bbcc] => [0000|aaaa|bbbb|bb00] | |
+- const __m256i s1 = _mm256_and_si256(s0, vec(0b0000111111111100)); | |
++ const __m256i s1 = _mm256_and_si256(s0, simdutf_vec(0b0000111111111100)); | |
+ // [0000|aaaa|bbbb|bb00] => [00bb|bbbb|0000|aaaa] | |
+- const __m256i s2 = _mm256_maddubs_epi16(s1, vec(0x0140)); | |
++ const __m256i s2 = _mm256_maddubs_epi16(s1, simdutf_vec(0x0140)); | |
+ // [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] | |
+- const __m256i s3 = _mm256_or_si256(s2, vec(0b1100000011100000)); | |
+- const __m256i m0 = _mm256_andnot_si256(one_or_two_bytes_bytemask, vec(0b0100000000000000)); | |
++ const __m256i s3 = _mm256_or_si256(s2, simdutf_vec(0b1100000011100000)); | |
++ const __m256i m0 = _mm256_andnot_si256(one_or_two_bytes_bytemask, simdutf_vec(0b0100000000000000)); | |
+ const __m256i s4 = _mm256_xor_si256(s3, m0); | |
+-#undef vec | |
++#undef simdutf_vec | |
+ | |
+ // 4. expand words 16-bit => 32-bit | |
+ const __m256i out0 = _mm256_unpacklo_epi16(t2, s4); | |
+@@ -20534,7 +20611,7 @@ std::pair<result, char*> avx2_convert_utf16_to_utf8_with_errors(const char16_t* | |
+ return std::make_pair(result(error_code::SUCCESS, buf - start), utf8_output); | |
+ } | |
+ /* end file src/haswell/avx2_convert_utf16_to_utf8.cpp */ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=haswell/avx2_convert_utf16_to_utf32.cpp | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=haswell/avx2_convert_utf16_to_utf32.cpp | |
+ /* begin file src/haswell/avx2_convert_utf16_to_utf32.cpp */ | |
+ /* | |
+ The vectorized algorithm works on single SSE register i.e., it | |
+@@ -20719,7 +20796,7 @@ std::pair<result, char32_t*> avx2_convert_utf16_to_utf32_with_errors(const char1 | |
+ } | |
+ /* end file src/haswell/avx2_convert_utf16_to_utf32.cpp */ | |
+ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=haswell/avx2_convert_utf32_to_utf8.cpp | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=haswell/avx2_convert_utf32_to_utf8.cpp | |
+ /* begin file src/haswell/avx2_convert_utf32_to_utf8.cpp */ | |
+ std::pair<const char32_t*, char*> avx2_convert_utf32_to_utf8(const char32_t* buf, size_t len, char* utf8_output) { | |
+ const char32_t* end = buf + len; | |
+@@ -20732,7 +20809,7 @@ std::pair<const char32_t*, char*> avx2_convert_utf32_to_utf8(const char32_t* buf | |
+ __m256i running_max = _mm256_setzero_si256(); | |
+ __m256i forbidden_bytemask = _mm256_setzero_si256(); | |
+ | |
+- const size_t safety_margin = 11; // to avoid overruns, see issue https://github.com/simdutf/simdutf/issues/92 | |
++ const size_t safety_margin = 12; // to avoid overruns, see issue https://github.com/simdutf/simdutf/issues/92 | |
+ | |
+ while (buf + 16 + safety_margin <= end) { | |
+ __m256i in = _mm256_loadu_si256((__m256i*)buf); | |
+@@ -20843,25 +20920,25 @@ std::pair<const char32_t*, char*> avx2_convert_utf32_to_utf8(const char32_t* buf | |
+ * t2 => [0ccc|cccc] [10cc|cccc] | |
+ * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) | |
+ */ | |
+-#define vec(x) _mm256_set1_epi16(static_cast<uint16_t>(x)) | |
++#define simdutf_vec(x) _mm256_set1_epi16(static_cast<uint16_t>(x)) | |
+ // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] | |
+ const __m256i t0 = _mm256_shuffle_epi8(in_16, dup_even); | |
+ // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] | |
+- const __m256i t1 = _mm256_and_si256(t0, vec(0b0011111101111111)); | |
++ const __m256i t1 = _mm256_and_si256(t0, simdutf_vec(0b0011111101111111)); | |
+ // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] | |
+- const __m256i t2 = _mm256_or_si256 (t1, vec(0b1000000000000000)); | |
++ const __m256i t2 = _mm256_or_si256 (t1, simdutf_vec(0b1000000000000000)); | |
+ | |
+ // [aaaa|bbbb|bbcc|cccc] => [0000|aaaa|bbbb|bbcc] | |
+ const __m256i s0 = _mm256_srli_epi16(in_16, 4); | |
+ // [0000|aaaa|bbbb|bbcc] => [0000|aaaa|bbbb|bb00] | |
+- const __m256i s1 = _mm256_and_si256(s0, vec(0b0000111111111100)); | |
++ const __m256i s1 = _mm256_and_si256(s0, simdutf_vec(0b0000111111111100)); | |
+ // [0000|aaaa|bbbb|bb00] => [00bb|bbbb|0000|aaaa] | |
+- const __m256i s2 = _mm256_maddubs_epi16(s1, vec(0x0140)); | |
++ const __m256i s2 = _mm256_maddubs_epi16(s1, simdutf_vec(0x0140)); | |
+ // [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] | |
+- const __m256i s3 = _mm256_or_si256(s2, vec(0b1100000011100000)); | |
+- const __m256i m0 = _mm256_andnot_si256(one_or_two_bytes_bytemask, vec(0b0100000000000000)); | |
++ const __m256i s3 = _mm256_or_si256(s2, simdutf_vec(0b1100000011100000)); | |
++ const __m256i m0 = _mm256_andnot_si256(one_or_two_bytes_bytemask, simdutf_vec(0b0100000000000000)); | |
+ const __m256i s4 = _mm256_xor_si256(s3, m0); | |
+-#undef vec | |
++#undef simdutf_vec | |
+ | |
+ // 4. expand words 16-bit => 32-bit | |
+ const __m256i out0 = _mm256_unpacklo_epi16(t2, s4); | |
+@@ -20973,7 +21050,7 @@ std::pair<result, char*> avx2_convert_utf32_to_utf8_with_errors(const char32_t* | |
+ const __m256i v_7fffffff = _mm256_set1_epi32((uint32_t)0x7fffffff); | |
+ const __m256i v_10ffff = _mm256_set1_epi32((uint32_t)0x10ffff); | |
+ | |
+- const size_t safety_margin = 11; // to avoid overruns, see issue https://github.com/simdutf/simdutf/issues/92 | |
++ const size_t safety_margin = 12; // to avoid overruns, see issue https://github.com/simdutf/simdutf/issues/92 | |
+ | |
+ while (buf + 16 + safety_margin <= end) { | |
+ __m256i in = _mm256_loadu_si256((__m256i*)buf); | |
+@@ -21093,25 +21170,25 @@ std::pair<result, char*> avx2_convert_utf32_to_utf8_with_errors(const char32_t* | |
+ * t2 => [0ccc|cccc] [10cc|cccc] | |
+ * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) | |
+ */ | |
+-#define vec(x) _mm256_set1_epi16(static_cast<uint16_t>(x)) | |
++#define simdutf_vec(x) _mm256_set1_epi16(static_cast<uint16_t>(x)) | |
+ // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] | |
+ const __m256i t0 = _mm256_shuffle_epi8(in_16, dup_even); | |
+ // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] | |
+- const __m256i t1 = _mm256_and_si256(t0, vec(0b0011111101111111)); | |
++ const __m256i t1 = _mm256_and_si256(t0, simdutf_vec(0b0011111101111111)); | |
+ // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] | |
+- const __m256i t2 = _mm256_or_si256 (t1, vec(0b1000000000000000)); | |
++ const __m256i t2 = _mm256_or_si256 (t1, simdutf_vec(0b1000000000000000)); | |
+ | |
+ // [aaaa|bbbb|bbcc|cccc] => [0000|aaaa|bbbb|bbcc] | |
+ const __m256i s0 = _mm256_srli_epi16(in_16, 4); | |
+ // [0000|aaaa|bbbb|bbcc] => [0000|aaaa|bbbb|bb00] | |
+- const __m256i s1 = _mm256_and_si256(s0, vec(0b0000111111111100)); | |
++ const __m256i s1 = _mm256_and_si256(s0, simdutf_vec(0b0000111111111100)); | |
+ // [0000|aaaa|bbbb|bb00] => [00bb|bbbb|0000|aaaa] | |
+- const __m256i s2 = _mm256_maddubs_epi16(s1, vec(0x0140)); | |
++ const __m256i s2 = _mm256_maddubs_epi16(s1, simdutf_vec(0x0140)); | |
+ // [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] | |
+- const __m256i s3 = _mm256_or_si256(s2, vec(0b1100000011100000)); | |
+- const __m256i m0 = _mm256_andnot_si256(one_or_two_bytes_bytemask, vec(0b0100000000000000)); | |
++ const __m256i s3 = _mm256_or_si256(s2, simdutf_vec(0b1100000011100000)); | |
++ const __m256i m0 = _mm256_andnot_si256(one_or_two_bytes_bytemask, simdutf_vec(0b0100000000000000)); | |
+ const __m256i s4 = _mm256_xor_si256(s3, m0); | |
+-#undef vec | |
++#undef simdutf_vec | |
+ | |
+ // 4. expand words 16-bit => 32-bit | |
+ const __m256i out0 = _mm256_unpacklo_epi16(t2, s4); | |
+@@ -21202,13 +21279,13 @@ std::pair<result, char*> avx2_convert_utf32_to_utf8_with_errors(const char32_t* | |
+ return std::make_pair(result(error_code::SUCCESS, buf - start), utf8_output); | |
+ } | |
+ /* end file src/haswell/avx2_convert_utf32_to_utf8.cpp */ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=haswell/avx2_convert_utf32_to_utf16.cpp | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=haswell/avx2_convert_utf32_to_utf16.cpp | |
+ /* begin file src/haswell/avx2_convert_utf32_to_utf16.cpp */ | |
+ template <endianness big_endian> | |
+ std::pair<const char32_t*, char16_t*> avx2_convert_utf32_to_utf16(const char32_t* buf, size_t len, char16_t* utf16_output) { | |
+ const char32_t* end = buf + len; | |
+ | |
+- const size_t safety_margin = 11; // to avoid overruns, see issue https://github.com/simdutf/simdutf/issues/92 | |
++ const size_t safety_margin = 12; // to avoid overruns, see issue https://github.com/simdutf/simdutf/issues/92 | |
+ __m256i forbidden_bytemask = _mm256_setzero_si256(); | |
+ | |
+ | |
+@@ -21275,7 +21352,7 @@ std::pair<result, char16_t*> avx2_convert_utf32_to_utf16_with_errors(const char3 | |
+ const char32_t* start = buf; | |
+ const char32_t* end = buf + len; | |
+ | |
+- const size_t safety_margin = 11; // to avoid overruns, see issue https://github.com/simdutf/simdutf/issues/92 | |
++ const size_t safety_margin = 12; // to avoid overruns, see issue https://github.com/simdutf/simdutf/issues/92 | |
+ | |
+ while (buf + 8 + safety_margin <= end) { | |
+ __m256i in = _mm256_loadu_si256((__m256i*)buf); | |
+@@ -21338,7 +21415,7 @@ std::pair<result, char16_t*> avx2_convert_utf32_to_utf16_with_errors(const char3 | |
+ } // namespace haswell | |
+ } // namespace simdutf | |
+ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=generic/buf_block_reader.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=generic/buf_block_reader.h | |
+ /* begin file src/generic/buf_block_reader.h */ | |
+ namespace simdutf { | |
+ namespace haswell { | |
+@@ -21433,7 +21510,7 @@ simdutf_really_inline void buf_block_reader<STEP_SIZE>::advance() { | |
+ } // namespace haswell | |
+ } // namespace simdutf | |
+ /* end file src/generic/buf_block_reader.h */ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=generic/utf8_validation/utf8_lookup4_algorithm.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=generic/utf8_validation/utf8_lookup4_algorithm.h | |
+ /* begin file src/generic/utf8_validation/utf8_lookup4_algorithm.h */ | |
+ namespace simdutf { | |
+ namespace haswell { | |
+@@ -21622,7 +21699,7 @@ using utf8_validation::utf8_checker; | |
+ } // namespace haswell | |
+ } // namespace simdutf | |
+ /* end file src/generic/utf8_validation/utf8_lookup4_algorithm.h */ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=generic/utf8_validation/utf8_validator.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=generic/utf8_validation/utf8_validator.h | |
+ /* begin file src/generic/utf8_validation/utf8_validator.h */ | |
+ namespace simdutf { | |
+ namespace haswell { | |
+@@ -21749,7 +21826,7 @@ result generic_validate_ascii_with_errors(const char * input, size_t length) { | |
+ } // namespace simdutf | |
+ /* end file src/generic/utf8_validation/utf8_validator.h */ | |
+ // transcoding from UTF-8 to UTF-16 | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=generic/utf8_to_utf16/valid_utf8_to_utf16.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=generic/utf8_to_utf16/valid_utf8_to_utf16.h | |
+ /* begin file src/generic/utf8_to_utf16/valid_utf8_to_utf16.h */ | |
+ | |
+ | |
+@@ -21810,7 +21887,7 @@ simdutf_warn_unused size_t convert_valid(const char* input, size_t size, | |
+ utf8_end_of_code_point_mask >>= consumed; | |
+ } | |
+ // At this point there may remain between 0 and 12 bytes in the | |
+- // 64-byte block.These bytes will be processed again. So we have an | |
++ // 64-byte block. These bytes will be processed again. So we have an | |
+ // 80% efficiency (in the worst case). In practice we expect an | |
+ // 85% to 90% efficiency. | |
+ } | |
+@@ -21824,7 +21901,7 @@ simdutf_warn_unused size_t convert_valid(const char* input, size_t size, | |
+ } // namespace haswell | |
+ } // namespace simdutf | |
+ /* end file src/generic/utf8_to_utf16/valid_utf8_to_utf16.h */ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=generic/utf8_to_utf16/utf8_to_utf16.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=generic/utf8_to_utf16/utf8_to_utf16.h | |
+ /* begin file src/generic/utf8_to_utf16/utf8_to_utf16.h */ | |
+ | |
+ | |
+@@ -21956,7 +22033,19 @@ using namespace simd; | |
+ simdutf_really_inline size_t convert(const char* in, size_t size, char16_t* utf16_output) { | |
+ size_t pos = 0; | |
+ char16_t* start{utf16_output}; | |
+- const size_t safety_margin = 16; // to avoid overruns! | |
++ // In the worst case, we have the haswell kernel which can cause an overflow of | |
++ // 8 bytes when calling convert_masked_utf8_to_utf16. If you skip the last 16 bytes, | |
++ // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate | |
++ // much more than 8 bytes. However, you cannot generally assume that you have valid | |
++ // UTF-8 input, so we are going to go back from the end counting 8 leading bytes, | |
++ // to give us a good margin. | |
++ size_t leading_byte = 0; | |
++ size_t margin = size; | |
++ for(; margin > 0 && leading_byte < 8; margin--) { | |
++ leading_byte += (int8_t(in[margin-1]) > -65); | |
++ } | |
++ // If the input is long enough, then we have that margin-1 is the eight last leading byte. | |
++ const size_t safety_margin = size - margin + 1; // to avoid overruns! | |
+ while(pos + 64 + safety_margin <= size) { | |
+ simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos)); | |
+ if(input.is_ascii()) { | |
+@@ -22001,7 +22090,7 @@ using namespace simd; | |
+ utf8_end_of_code_point_mask >>= consumed; | |
+ } | |
+ // At this point there may remain between 0 and 12 bytes in the | |
+- // 64-byte block.These bytes will be processed again. So we have an | |
++ // 64-byte block. These bytes will be processed again. So we have an | |
+ // 80% efficiency (in the worst case). In practice we expect an | |
+ // 85% to 90% efficiency. | |
+ } | |
+@@ -22019,7 +22108,19 @@ using namespace simd; | |
+ simdutf_really_inline result convert_with_errors(const char* in, size_t size, char16_t* utf16_output) { | |
+ size_t pos = 0; | |
+ char16_t* start{utf16_output}; | |
+- const size_t safety_margin = 16; // to avoid overruns! | |
++ // In the worst case, we have the haswell kernel which can cause an overflow of | |
++ // 8 bytes when calling convert_masked_utf8_to_utf16. If you skip the last 16 bytes, | |
++ // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate | |
++ // much more than 8 bytes. However, you cannot generally assume that you have valid | |
++ // UTF-8 input, so we are going to go back from the end counting 8 leading bytes, | |
++ // to give us a good margin. | |
++ size_t leading_byte = 0; | |
++ size_t margin = size; | |
++ for(; margin > 0 && leading_byte < 8; margin--) { | |
++ leading_byte += (int8_t(in[margin-1]) > -65); | |
++ } | |
++ // If the input is long enough, then we have that margin-1 is the eight last leading byte. | |
++ const size_t safety_margin = size - margin + 1; // to avoid overruns! | |
+ while(pos + 64 + safety_margin <= size) { | |
+ simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos)); | |
+ if(input.is_ascii()) { | |
+@@ -22071,7 +22172,7 @@ using namespace simd; | |
+ utf8_end_of_code_point_mask >>= consumed; | |
+ } | |
+ // At this point there may remain between 0 and 12 bytes in the | |
+- // 64-byte block.These bytes will be processed again. So we have an | |
++ // 64-byte block. These bytes will be processed again. So we have an | |
+ // 80% efficiency (in the worst case). In practice we expect an | |
+ // 85% to 90% efficiency. | |
+ } | |
+@@ -22108,7 +22209,7 @@ using namespace simd; | |
+ } // namespace simdutf | |
+ /* end file src/generic/utf8_to_utf16/utf8_to_utf16.h */ | |
+ // transcoding from UTF-8 to UTF-32 | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=generic/utf8_to_utf32/valid_utf8_to_utf32.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=generic/utf8_to_utf32/valid_utf8_to_utf32.h | |
+ /* begin file src/generic/utf8_to_utf32/valid_utf8_to_utf32.h */ | |
+ | |
+ namespace simdutf { | |
+@@ -22154,7 +22255,7 @@ simdutf_warn_unused size_t convert_valid(const char* input, size_t size, | |
+ } // namespace haswell | |
+ } // namespace simdutf | |
+ /* end file src/generic/utf8_to_utf32/valid_utf8_to_utf32.h */ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=generic/utf8_to_utf32/utf8_to_utf32.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=generic/utf8_to_utf32/utf8_to_utf32.h | |
+ /* begin file src/generic/utf8_to_utf32/utf8_to_utf32.h */ | |
+ | |
+ | |
+@@ -22286,7 +22387,19 @@ using namespace simd; | |
+ simdutf_really_inline size_t convert(const char* in, size_t size, char32_t* utf32_output) { | |
+ size_t pos = 0; | |
+ char32_t* start{utf32_output}; | |
+- const size_t safety_margin = 16; // to avoid overruns! | |
++ // In the worst case, we have the haswell kernel which can cause an overflow of | |
++ // 8 bytes when calling convert_masked_utf8_to_utf32. If you skip the last 16 bytes, | |
++ // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate | |
++ // much more than 8 bytes. However, you cannot generally assume that you have valid | |
++ // UTF-8 input, so we are going to go back from the end counting 4 leading bytes, | |
++ // to give us a good margin. | |
++ size_t leading_byte = 0; | |
++ size_t margin = size; | |
++ for(; margin > 0 && leading_byte < 4; margin--) { | |
++ leading_byte += (int8_t(in[margin-1]) > -65); | |
++ } | |
++ // If the input is long enough, then we have that margin-1 is the fourth last leading byte. | |
++ const size_t safety_margin = size - margin + 1; // to avoid overruns! | |
+ while(pos + 64 + safety_margin <= size) { | |
+ simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos)); | |
+ if(input.is_ascii()) { | |
+@@ -22331,7 +22444,7 @@ using namespace simd; | |
+ utf8_end_of_code_point_mask >>= consumed; | |
+ } | |
+ // At this point there may remain between 0 and 12 bytes in the | |
+- // 64-byte block.These bytes will be processed again. So we have an | |
++ // 64-byte block. These bytes will be processed again. So we have an | |
+ // 80% efficiency (in the worst case). In practice we expect an | |
+ // 85% to 90% efficiency. | |
+ } | |
+@@ -22348,7 +22461,19 @@ using namespace simd; | |
+ simdutf_really_inline result convert_with_errors(const char* in, size_t size, char32_t* utf32_output) { | |
+ size_t pos = 0; | |
+ char32_t* start{utf32_output}; | |
+- const size_t safety_margin = 16; // to avoid overruns! | |
++ // In the worst case, we have the haswell kernel which can cause an overflow of | |
++ // 8 bytes when calling convert_masked_utf8_to_utf32. If you skip the last 16 bytes, | |
++ // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate | |
++ // much more than 8 bytes. However, you cannot generally assume that you have valid | |
++ // UTF-8 input, so we are going to go back from the end counting 4 leading bytes, | |
++ // to give us a good margin. | |
++ size_t leading_byte = 0; | |
++ size_t margin = size; | |
++ for(; margin > 0 && leading_byte < 4; margin--) { | |
++ leading_byte += (int8_t(in[margin-1]) > -65); | |
++ } | |
++ // If the input is long enough, then we have that margin-1 is the fourth last leading byte. | |
++ const size_t safety_margin = size - margin + 1; // to avoid overruns! | |
+ while(pos + 64 + safety_margin <= size) { | |
+ simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos)); | |
+ if(input.is_ascii()) { | |
+@@ -22398,7 +22523,7 @@ using namespace simd; | |
+ utf8_end_of_code_point_mask >>= consumed; | |
+ } | |
+ // At this point there may remain between 0 and 12 bytes in the | |
+- // 64-byte block.These bytes will be processed again. So we have an | |
++ // 64-byte block. These bytes will be processed again. So we have an | |
+ // 80% efficiency (in the worst case). In practice we expect an | |
+ // 85% to 90% efficiency. | |
+ } | |
+@@ -22431,7 +22556,7 @@ using namespace simd; | |
+ } // namespace simdutf | |
+ /* end file src/generic/utf8_to_utf32/utf8_to_utf32.h */ | |
+ // other functions | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=generic/utf8.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=generic/utf8.h | |
+ /* begin file src/generic/utf8.h */ | |
+ | |
+ namespace simdutf { | |
+@@ -22478,7 +22603,7 @@ simdutf_really_inline size_t utf32_length_from_utf8(const char* in, size_t size) | |
+ } // namespace haswell | |
+ } // namespace simdutf | |
+ /* end file src/generic/utf8.h */ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=generic/utf16.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=generic/utf16.h | |
+ /* begin file src/generic/utf16.h */ | |
+ namespace simdutf { | |
+ namespace haswell { | |
+@@ -23006,7 +23131,7 @@ simdutf_warn_unused size_t implementation::utf32_length_from_utf8(const char * i | |
+ } // namespace haswell | |
+ } // namespace simdutf | |
+ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/haswell/end.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=simdutf/haswell/end.h | |
+ /* begin file src/simdutf/haswell/end.h */ | |
+ #if SIMDUTF_CAN_ALWAYS_RUN_HASWELL | |
+ // nothing needed. | |
+@@ -23022,14 +23147,14 @@ SIMDUTF_POP_DISABLE_WARNINGS | |
+ /* end file src/haswell/implementation.cpp */ | |
+ #endif | |
+ #if SIMDUTF_IMPLEMENTATION_PPC64 | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=ppc64/implementation.cpp | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=ppc64/implementation.cpp | |
+ /* begin file src/ppc64/implementation.cpp */ | |
+ | |
+ | |
+ | |
+ | |
+ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/ppc64/begin.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=simdutf/ppc64/begin.h | |
+ /* begin file src/simdutf/ppc64/begin.h */ | |
+ // redefining SIMDUTF_IMPLEMENTATION to "ppc64" | |
+ // #define SIMDUTF_IMPLEMENTATION ppc64 | |
+@@ -23067,7 +23192,7 @@ simdutf_really_inline simd8<bool> must_be_2_3_continuation(const simd8<uint8_t> | |
+ } // namespace ppc64 | |
+ } // namespace simdutf | |
+ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=generic/buf_block_reader.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=generic/buf_block_reader.h | |
+ /* begin file src/generic/buf_block_reader.h */ | |
+ namespace simdutf { | |
+ namespace ppc64 { | |
+@@ -23162,7 +23287,7 @@ simdutf_really_inline void buf_block_reader<STEP_SIZE>::advance() { | |
+ } // namespace ppc64 | |
+ } // namespace simdutf | |
+ /* end file src/generic/buf_block_reader.h */ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=generic/utf8_validation/utf8_lookup4_algorithm.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=generic/utf8_validation/utf8_lookup4_algorithm.h | |
+ /* begin file src/generic/utf8_validation/utf8_lookup4_algorithm.h */ | |
+ namespace simdutf { | |
+ namespace ppc64 { | |
+@@ -23351,7 +23476,7 @@ using utf8_validation::utf8_checker; | |
+ } // namespace ppc64 | |
+ } // namespace simdutf | |
+ /* end file src/generic/utf8_validation/utf8_lookup4_algorithm.h */ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=generic/utf8_validation/utf8_validator.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=generic/utf8_validation/utf8_validator.h | |
+ /* begin file src/generic/utf8_validation/utf8_validator.h */ | |
+ namespace simdutf { | |
+ namespace ppc64 { | |
+@@ -23478,7 +23603,7 @@ result generic_validate_ascii_with_errors(const char * input, size_t length) { | |
+ } // namespace simdutf | |
+ /* end file src/generic/utf8_validation/utf8_validator.h */ | |
+ // transcoding from UTF-8 to UTF-16 | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=generic/utf8_to_utf16/valid_utf8_to_utf16.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=generic/utf8_to_utf16/valid_utf8_to_utf16.h | |
+ /* begin file src/generic/utf8_to_utf16/valid_utf8_to_utf16.h */ | |
+ | |
+ | |
+@@ -23539,7 +23664,7 @@ simdutf_warn_unused size_t convert_valid(const char* input, size_t size, | |
+ utf8_end_of_code_point_mask >>= consumed; | |
+ } | |
+ // At this point there may remain between 0 and 12 bytes in the | |
+- // 64-byte block.These bytes will be processed again. So we have an | |
++ // 64-byte block. These bytes will be processed again. So we have an | |
+ // 80% efficiency (in the worst case). In practice we expect an | |
+ // 85% to 90% efficiency. | |
+ } | |
+@@ -23553,7 +23678,7 @@ simdutf_warn_unused size_t convert_valid(const char* input, size_t size, | |
+ } // namespace ppc64 | |
+ } // namespace simdutf | |
+ /* end file src/generic/utf8_to_utf16/valid_utf8_to_utf16.h */ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=generic/utf8_to_utf16/utf8_to_utf16.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=generic/utf8_to_utf16/utf8_to_utf16.h | |
+ /* begin file src/generic/utf8_to_utf16/utf8_to_utf16.h */ | |
+ | |
+ | |
+@@ -23685,7 +23810,19 @@ using namespace simd; | |
+ simdutf_really_inline size_t convert(const char* in, size_t size, char16_t* utf16_output) { | |
+ size_t pos = 0; | |
+ char16_t* start{utf16_output}; | |
+- const size_t safety_margin = 16; // to avoid overruns! | |
++ // In the worst case, we have the haswell kernel which can cause an overflow of | |
++ // 8 bytes when calling convert_masked_utf8_to_utf16. If you skip the last 16 bytes, | |
++ // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate | |
++ // much more than 8 bytes. However, you cannot generally assume that you have valid | |
++ // UTF-8 input, so we are going to go back from the end counting 8 leading bytes, | |
++ // to give us a good margin. | |
++ size_t leading_byte = 0; | |
++ size_t margin = size; | |
++ for(; margin > 0 && leading_byte < 8; margin--) { | |
++ leading_byte += (int8_t(in[margin-1]) > -65); | |
++ } | |
++ // If the input is long enough, then we have that margin-1 is the eight last leading byte. | |
++ const size_t safety_margin = size - margin + 1; // to avoid overruns! | |
+ while(pos + 64 + safety_margin <= size) { | |
+ simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos)); | |
+ if(input.is_ascii()) { | |
+@@ -23730,7 +23867,7 @@ using namespace simd; | |
+ utf8_end_of_code_point_mask >>= consumed; | |
+ } | |
+ // At this point there may remain between 0 and 12 bytes in the | |
+- // 64-byte block.These bytes will be processed again. So we have an | |
++ // 64-byte block. These bytes will be processed again. So we have an | |
+ // 80% efficiency (in the worst case). In practice we expect an | |
+ // 85% to 90% efficiency. | |
+ } | |
+@@ -23748,7 +23885,19 @@ using namespace simd; | |
+ simdutf_really_inline result convert_with_errors(const char* in, size_t size, char16_t* utf16_output) { | |
+ size_t pos = 0; | |
+ char16_t* start{utf16_output}; | |
+- const size_t safety_margin = 16; // to avoid overruns! | |
++ // In the worst case, we have the haswell kernel which can cause an overflow of | |
++ // 8 bytes when calling convert_masked_utf8_to_utf16. If you skip the last 16 bytes, | |
++ // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate | |
++ // much more than 8 bytes. However, you cannot generally assume that you have valid | |
++ // UTF-8 input, so we are going to go back from the end counting 8 leading bytes, | |
++ // to give us a good margin. | |
++ size_t leading_byte = 0; | |
++ size_t margin = size; | |
++ for(; margin > 0 && leading_byte < 8; margin--) { | |
++ leading_byte += (int8_t(in[margin-1]) > -65); | |
++ } | |
++ // If the input is long enough, then we have that margin-1 is the eight last leading byte. | |
++ const size_t safety_margin = size - margin + 1; // to avoid overruns! | |
+ while(pos + 64 + safety_margin <= size) { | |
+ simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos)); | |
+ if(input.is_ascii()) { | |
+@@ -23800,7 +23949,7 @@ using namespace simd; | |
+ utf8_end_of_code_point_mask >>= consumed; | |
+ } | |
+ // At this point there may remain between 0 and 12 bytes in the | |
+- // 64-byte block.These bytes will be processed again. So we have an | |
++ // 64-byte block. These bytes will be processed again. So we have an | |
+ // 80% efficiency (in the worst case). In practice we expect an | |
+ // 85% to 90% efficiency. | |
+ } | |
+@@ -23837,7 +23986,7 @@ using namespace simd; | |
+ } // namespace simdutf | |
+ /* end file src/generic/utf8_to_utf16/utf8_to_utf16.h */ | |
+ // transcoding from UTF-8 to UTF-32 | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=generic/utf8_to_utf32/valid_utf8_to_utf32.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=generic/utf8_to_utf32/valid_utf8_to_utf32.h | |
+ /* begin file src/generic/utf8_to_utf32/valid_utf8_to_utf32.h */ | |
+ | |
+ namespace simdutf { | |
+@@ -23883,7 +24032,7 @@ simdutf_warn_unused size_t convert_valid(const char* input, size_t size, | |
+ } // namespace ppc64 | |
+ } // namespace simdutf | |
+ /* end file src/generic/utf8_to_utf32/valid_utf8_to_utf32.h */ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=generic/utf8_to_utf32/utf8_to_utf32.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=generic/utf8_to_utf32/utf8_to_utf32.h | |
+ /* begin file src/generic/utf8_to_utf32/utf8_to_utf32.h */ | |
+ | |
+ | |
+@@ -24015,7 +24164,19 @@ using namespace simd; | |
+ simdutf_really_inline size_t convert(const char* in, size_t size, char32_t* utf32_output) { | |
+ size_t pos = 0; | |
+ char32_t* start{utf32_output}; | |
+- const size_t safety_margin = 16; // to avoid overruns! | |
++ // In the worst case, we have the haswell kernel which can cause an overflow of | |
++ // 8 bytes when calling convert_masked_utf8_to_utf32. If you skip the last 16 bytes, | |
++ // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate | |
++ // much more than 8 bytes. However, you cannot generally assume that you have valid | |
++ // UTF-8 input, so we are going to go back from the end counting 4 leading bytes, | |
++ // to give us a good margin. | |
++ size_t leading_byte = 0; | |
++ size_t margin = size; | |
++ for(; margin > 0 && leading_byte < 4; margin--) { | |
++ leading_byte += (int8_t(in[margin-1]) > -65); | |
++ } | |
++ // If the input is long enough, then we have that margin-1 is the fourth last leading byte. | |
++ const size_t safety_margin = size - margin + 1; // to avoid overruns! | |
+ while(pos + 64 + safety_margin <= size) { | |
+ simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos)); | |
+ if(input.is_ascii()) { | |
+@@ -24060,7 +24221,7 @@ using namespace simd; | |
+ utf8_end_of_code_point_mask >>= consumed; | |
+ } | |
+ // At this point there may remain between 0 and 12 bytes in the | |
+- // 64-byte block.These bytes will be processed again. So we have an | |
++ // 64-byte block. These bytes will be processed again. So we have an | |
+ // 80% efficiency (in the worst case). In practice we expect an | |
+ // 85% to 90% efficiency. | |
+ } | |
+@@ -24077,7 +24238,19 @@ using namespace simd; | |
+ simdutf_really_inline result convert_with_errors(const char* in, size_t size, char32_t* utf32_output) { | |
+ size_t pos = 0; | |
+ char32_t* start{utf32_output}; | |
+- const size_t safety_margin = 16; // to avoid overruns! | |
++ // In the worst case, we have the haswell kernel which can cause an overflow of | |
++ // 8 bytes when calling convert_masked_utf8_to_utf32. If you skip the last 16 bytes, | |
++ // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate | |
++ // much more than 8 bytes. However, you cannot generally assume that you have valid | |
++ // UTF-8 input, so we are going to go back from the end counting 4 leading bytes, | |
++ // to give us a good margin. | |
++ size_t leading_byte = 0; | |
++ size_t margin = size; | |
++ for(; margin > 0 && leading_byte < 4; margin--) { | |
++ leading_byte += (int8_t(in[margin-1]) > -65); | |
++ } | |
++ // If the input is long enough, then we have that margin-1 is the fourth last leading byte. | |
++ const size_t safety_margin = size - margin + 1; // to avoid overruns! | |
+ while(pos + 64 + safety_margin <= size) { | |
+ simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos)); | |
+ if(input.is_ascii()) { | |
+@@ -24127,7 +24300,7 @@ using namespace simd; | |
+ utf8_end_of_code_point_mask >>= consumed; | |
+ } | |
+ // At this point there may remain between 0 and 12 bytes in the | |
+- // 64-byte block.These bytes will be processed again. So we have an | |
++ // 64-byte block. These bytes will be processed again. So we have an | |
+ // 80% efficiency (in the worst case). In practice we expect an | |
+ // 85% to 90% efficiency. | |
+ } | |
+@@ -24160,7 +24333,7 @@ using namespace simd; | |
+ } // namespace simdutf | |
+ /* end file src/generic/utf8_to_utf32/utf8_to_utf32.h */ | |
+ // other functions | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=generic/utf8.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=generic/utf8.h | |
+ /* begin file src/generic/utf8.h */ | |
+ | |
+ namespace simdutf { | |
+@@ -24207,7 +24380,7 @@ simdutf_really_inline size_t utf32_length_from_utf8(const char* in, size_t size) | |
+ } // namespace ppc64 | |
+ } // namespace simdutf | |
+ /* end file src/generic/utf8.h */ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=generic/utf16.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=generic/utf16.h | |
+ /* begin file src/generic/utf16.h */ | |
+ namespace simdutf { | |
+ namespace ppc64 { | |
+@@ -24506,15 +24679,15 @@ simdutf_warn_unused size_t implementation::utf32_length_from_utf8(const char * i | |
+ } // namespace ppc64 | |
+ } // namespace simdutf | |
+ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/ppc64/end.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=simdutf/ppc64/end.h | |
+ /* begin file src/simdutf/ppc64/end.h */ | |
+ /* end file src/simdutf/ppc64/end.h */ | |
+ /* end file src/ppc64/implementation.cpp */ | |
+ #endif | |
+ #if SIMDUTF_IMPLEMENTATION_WESTMERE | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=westmere/implementation.cpp | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=westmere/implementation.cpp | |
+ /* begin file src/westmere/implementation.cpp */ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/westmere/begin.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=simdutf/westmere/begin.h | |
+ /* begin file src/simdutf/westmere/begin.h */ | |
+ // redefining SIMDUTF_IMPLEMENTATION to "westmere" | |
+ // #define SIMDUTF_IMPLEMENTATION westmere | |
+@@ -24552,7 +24725,7 @@ simdutf_really_inline simd8<bool> must_be_2_3_continuation(const simd8<uint8_t> | |
+ return simd8<int8_t>(is_third_byte | is_fourth_byte) > int8_t(0); | |
+ } | |
+ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=westmere/sse_detect_encodings.cpp | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=westmere/sse_detect_encodings.cpp | |
+ /* begin file src/westmere/sse_detect_encodings.cpp */ | |
+ template<class checker> | |
+ // len is known to be a multiple of 2 when this is called | |
+@@ -24762,7 +24935,7 @@ int sse_detect_encodings(const char * buf, size_t len) { | |
+ } | |
+ /* end file src/westmere/sse_detect_encodings.cpp */ | |
+ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=westmere/sse_validate_utf16.cpp | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=westmere/sse_validate_utf16.cpp | |
+ /* begin file src/westmere/sse_validate_utf16.cpp */ | |
+ /* | |
+ In UTF-16 words in range 0xD800 to 0xDFFF have special meaning. | |
+@@ -24962,7 +25135,7 @@ const result sse_validate_utf16_with_errors(const char16_t* input, size_t size) | |
+ return result(error_code::SUCCESS, input - start); | |
+ } | |
+ /* end file src/westmere/sse_validate_utf16.cpp */ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=westmere/sse_validate_utf32le.cpp | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=westmere/sse_validate_utf32le.cpp | |
+ /* begin file src/westmere/sse_validate_utf32le.cpp */ | |
+ /* Returns: | |
+ - pointer to the last unprocessed character (a scalar fallback should check the rest); | |
+@@ -25028,7 +25201,7 @@ const result sse_validate_utf32le_with_errors(const char32_t* input, size_t size | |
+ } | |
+ /* end file src/westmere/sse_validate_utf32le.cpp */ | |
+ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=westmere/sse_convert_utf8_to_utf16.cpp | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=westmere/sse_convert_utf8_to_utf16.cpp | |
+ /* begin file src/westmere/sse_convert_utf8_to_utf16.cpp */ | |
+ // depends on "tables/utf8_to_utf16_tables.h" | |
+ | |
+@@ -25144,6 +25317,14 @@ size_t convert_masked_utf8_to_utf16(const char *input, | |
+ utf16_output += 4; | |
+ } else if (idx < 209) { | |
+ // TWO (2) input code-words | |
++ ////////////// | |
++ // There might be garbage inputs where a leading byte mascarades as a four-byte | |
++ // leading byte (by being followed by 3 continuation byte), but is not greater than | |
++ // 0xf0. This could trigger a buffer overflow if we only counted leading | |
++ // bytes of the form 0xf0 as generating surrogate pairs, without further UTF-8 validation. | |
++ // Thus we must be careful to ensure that only leading bytes at least as large as 0xf0 generate surrogate pairs. | |
++ // We do as at the cost of an extra mask. | |
++ ///////////// | |
+ const __m128i sh = | |
+ _mm_loadu_si128((const __m128i *)tables::utf8_to_utf16::shufutf8[idx]); | |
+ const __m128i perm = _mm_shuffle_epi8(in, sh); | |
+@@ -25156,8 +25337,14 @@ size_t convert_masked_utf8_to_utf16(const char *input, | |
+ _mm_srli_epi32(_mm_and_si128(perm, _mm_set1_epi32(0x400000)), 1); | |
+ middlehighbyte = _mm_xor_si128(correct, middlehighbyte); | |
+ const __m128i middlehighbyte_shifted = _mm_srli_epi32(middlehighbyte, 4); | |
+- const __m128i highbyte = _mm_and_si128(perm, _mm_set1_epi32(0x07000000)); | |
++ // We deliberately carry the leading four bits in highbyte if they are present, | |
++ // we remove them later when computing hightenbits. | |
++ const __m128i highbyte = _mm_and_si128(perm, _mm_set1_epi32(0xff000000)); | |
+ const __m128i highbyte_shifted = _mm_srli_epi32(highbyte, 6); | |
++ // When we need to generate a surrogate pair (leading byte > 0xF0), then | |
++ // the corresponding 32-bit value in 'composed' will be greater than | |
++ // > (0xff00000>>6) or > 0x3c00000. This can be used later to identify the | |
++ // location of the surrogate pairs. | |
+ const __m128i composed = | |
+ _mm_or_si128(_mm_or_si128(ascii, middlebyte_shifted), | |
+ _mm_or_si128(highbyte_shifted, middlehighbyte_shifted)); | |
+@@ -25165,7 +25352,8 @@ size_t convert_masked_utf8_to_utf16(const char *input, | |
+ _mm_sub_epi32(composed, _mm_set1_epi32(0x10000)); | |
+ const __m128i lowtenbits = | |
+ _mm_and_si128(composedminus, _mm_set1_epi32(0x3ff)); | |
+- const __m128i hightenbits = _mm_srli_epi32(composedminus, 10); | |
++ // Notice the 0x3ff mask: | |
++ const __m128i hightenbits = _mm_and_si128(_mm_srli_epi32(composedminus, 10), _mm_set1_epi32(0x3ff)); | |
+ const __m128i lowtenbitsadd = | |
+ _mm_add_epi32(lowtenbits, _mm_set1_epi32(0xDC00)); | |
+ const __m128i hightenbitsadd = | |
+@@ -25183,13 +25371,13 @@ size_t convert_masked_utf8_to_utf16(const char *input, | |
+ uint32_t surrogate_buffer[4]; | |
+ _mm_storeu_si128((__m128i *)surrogate_buffer, surrogates); | |
+ for (size_t i = 0; i < 3; i++) { | |
+- if (basic_buffer[i] < 65536) { | |
+- utf16_output[0] = big_endian ? uint16_t(basic_buffer_swap[i]) : uint16_t(basic_buffer[i]); | |
+- utf16_output++; | |
+- } else { | |
++ if(basic_buffer[i] > 0x3c00000) { | |
+ utf16_output[0] = uint16_t(surrogate_buffer[i] & 0xffff); | |
+ utf16_output[1] = uint16_t(surrogate_buffer[i] >> 16); | |
+ utf16_output += 2; | |
++ } else { | |
++ utf16_output[0] = big_endian ? uint16_t(basic_buffer_swap[i]) : uint16_t(basic_buffer[i]); | |
++ utf16_output++; | |
+ } | |
+ } | |
+ } else { | |
+@@ -25198,7 +25386,7 @@ size_t convert_masked_utf8_to_utf16(const char *input, | |
+ return consumed; | |
+ } | |
+ /* end file src/westmere/sse_convert_utf8_to_utf16.cpp */ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=westmere/sse_convert_utf8_to_utf32.cpp | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=westmere/sse_convert_utf8_to_utf32.cpp | |
+ /* begin file src/westmere/sse_convert_utf8_to_utf32.cpp */ | |
+ // depends on "tables/utf8_to_utf16_tables.h" | |
+ | |
+@@ -25330,7 +25518,7 @@ size_t convert_masked_utf8_to_utf32(const char *input, | |
+ } | |
+ /* end file src/westmere/sse_convert_utf8_to_utf32.cpp */ | |
+ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=westmere/sse_convert_utf16_to_utf8.cpp | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=westmere/sse_convert_utf16_to_utf8.cpp | |
+ /* begin file src/westmere/sse_convert_utf16_to_utf8.cpp */ | |
+ /* | |
+ The vectorized algorithm works on single SSE register i.e., it | |
+@@ -25394,7 +25582,7 @@ std::pair<const char16_t*, char*> sse_convert_utf16_to_utf8(const char16_t* buf, | |
+ const __m128i v_f800 = _mm_set1_epi16((int16_t)0xf800); | |
+ const __m128i v_d800 = _mm_set1_epi16((int16_t)0xd800); | |
+ const __m128i v_c080 = _mm_set1_epi16((int16_t)0xc080); | |
+- const size_t safety_margin = 11; // to avoid overruns, see issue https://github.com/simdutf/simdutf/issues/92 | |
++ const size_t safety_margin = 12; // to avoid overruns, see issue https://github.com/simdutf/simdutf/issues/92 | |
+ | |
+ while (buf + 16 + safety_margin <= end) { | |
+ __m128i in = _mm_loadu_si128((__m128i*)buf); | |
+@@ -25521,25 +25709,25 @@ std::pair<const char16_t*, char*> sse_convert_utf16_to_utf8(const char16_t* buf, | |
+ * t2 => [0ccc|cccc] [10cc|cccc] | |
+ * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) | |
+ */ | |
+-#define vec(x) _mm_set1_epi16(static_cast<uint16_t>(x)) | |
++#define simdutf_vec(x) _mm_set1_epi16(static_cast<uint16_t>(x)) | |
+ // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] | |
+ const __m128i t0 = _mm_shuffle_epi8(in, dup_even); | |
+ // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] | |
+- const __m128i t1 = _mm_and_si128(t0, vec(0b0011111101111111)); | |
++ const __m128i t1 = _mm_and_si128(t0, simdutf_vec(0b0011111101111111)); | |
+ // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] | |
+- const __m128i t2 = _mm_or_si128 (t1, vec(0b1000000000000000)); | |
++ const __m128i t2 = _mm_or_si128 (t1, simdutf_vec(0b1000000000000000)); | |
+ | |
+ // [aaaa|bbbb|bbcc|cccc] => [0000|aaaa|bbbb|bbcc] | |
+ const __m128i s0 = _mm_srli_epi16(in, 4); | |
+ // [0000|aaaa|bbbb|bbcc] => [0000|aaaa|bbbb|bb00] | |
+- const __m128i s1 = _mm_and_si128(s0, vec(0b0000111111111100)); | |
++ const __m128i s1 = _mm_and_si128(s0, simdutf_vec(0b0000111111111100)); | |
+ // [0000|aaaa|bbbb|bb00] => [00bb|bbbb|0000|aaaa] | |
+- const __m128i s2 = _mm_maddubs_epi16(s1, vec(0x0140)); | |
++ const __m128i s2 = _mm_maddubs_epi16(s1, simdutf_vec(0x0140)); | |
+ // [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] | |
+- const __m128i s3 = _mm_or_si128(s2, vec(0b1100000011100000)); | |
+- const __m128i m0 = _mm_andnot_si128(one_or_two_bytes_bytemask, vec(0b0100000000000000)); | |
++ const __m128i s3 = _mm_or_si128(s2, simdutf_vec(0b1100000011100000)); | |
++ const __m128i m0 = _mm_andnot_si128(one_or_two_bytes_bytemask, simdutf_vec(0b0100000000000000)); | |
+ const __m128i s4 = _mm_xor_si128(s3, m0); | |
+-#undef vec | |
++#undef simdutf_vec | |
+ | |
+ // 4. expand words 16-bit => 32-bit | |
+ const __m128i out0 = _mm_unpacklo_epi16(t2, s4); | |
+@@ -25634,7 +25822,7 @@ std::pair<result, char*> sse_convert_utf16_to_utf8_with_errors(const char16_t* b | |
+ const __m128i v_f800 = _mm_set1_epi16((int16_t)0xf800); | |
+ const __m128i v_d800 = _mm_set1_epi16((int16_t)0xd800); | |
+ const __m128i v_c080 = _mm_set1_epi16((int16_t)0xc080); | |
+- const size_t safety_margin = 11; // to avoid overruns, see issue https://github.com/simdutf/simdutf/issues/92 | |
++ const size_t safety_margin = 12; // to avoid overruns, see issue https://github.com/simdutf/simdutf/issues/92 | |
+ | |
+ while (buf + 16 + safety_margin <= end) { | |
+ __m128i in = _mm_loadu_si128((__m128i*)buf); | |
+@@ -25761,25 +25949,25 @@ std::pair<result, char*> sse_convert_utf16_to_utf8_with_errors(const char16_t* b | |
+ * t2 => [0ccc|cccc] [10cc|cccc] | |
+ * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) | |
+ */ | |
+-#define vec(x) _mm_set1_epi16(static_cast<uint16_t>(x)) | |
++#define simdutf_vec(x) _mm_set1_epi16(static_cast<uint16_t>(x)) | |
+ // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] | |
+ const __m128i t0 = _mm_shuffle_epi8(in, dup_even); | |
+ // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] | |
+- const __m128i t1 = _mm_and_si128(t0, vec(0b0011111101111111)); | |
++ const __m128i t1 = _mm_and_si128(t0, simdutf_vec(0b0011111101111111)); | |
+ // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] | |
+- const __m128i t2 = _mm_or_si128 (t1, vec(0b1000000000000000)); | |
++ const __m128i t2 = _mm_or_si128 (t1, simdutf_vec(0b1000000000000000)); | |
+ | |
+ // [aaaa|bbbb|bbcc|cccc] => [0000|aaaa|bbbb|bbcc] | |
+ const __m128i s0 = _mm_srli_epi16(in, 4); | |
+ // [0000|aaaa|bbbb|bbcc] => [0000|aaaa|bbbb|bb00] | |
+- const __m128i s1 = _mm_and_si128(s0, vec(0b0000111111111100)); | |
++ const __m128i s1 = _mm_and_si128(s0, simdutf_vec(0b0000111111111100)); | |
+ // [0000|aaaa|bbbb|bb00] => [00bb|bbbb|0000|aaaa] | |
+- const __m128i s2 = _mm_maddubs_epi16(s1, vec(0x0140)); | |
++ const __m128i s2 = _mm_maddubs_epi16(s1, simdutf_vec(0x0140)); | |
+ // [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] | |
+- const __m128i s3 = _mm_or_si128(s2, vec(0b1100000011100000)); | |
+- const __m128i m0 = _mm_andnot_si128(one_or_two_bytes_bytemask, vec(0b0100000000000000)); | |
++ const __m128i s3 = _mm_or_si128(s2, simdutf_vec(0b1100000011100000)); | |
++ const __m128i m0 = _mm_andnot_si128(one_or_two_bytes_bytemask, simdutf_vec(0b0100000000000000)); | |
+ const __m128i s4 = _mm_xor_si128(s3, m0); | |
+-#undef vec | |
++#undef simdutf_vec | |
+ | |
+ // 4. expand words 16-bit => 32-bit | |
+ const __m128i out0 = _mm_unpacklo_epi16(t2, s4); | |
+@@ -25858,7 +26046,7 @@ std::pair<result, char*> sse_convert_utf16_to_utf8_with_errors(const char16_t* b | |
+ return std::make_pair(result(error_code::SUCCESS, buf - start), utf8_output); | |
+ } | |
+ /* end file src/westmere/sse_convert_utf16_to_utf8.cpp */ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=westmere/sse_convert_utf16_to_utf32.cpp | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=westmere/sse_convert_utf16_to_utf32.cpp | |
+ /* begin file src/westmere/sse_convert_utf16_to_utf32.cpp */ | |
+ /* | |
+ The vectorized algorithm works on single SSE register i.e., it | |
+@@ -26042,10 +26230,9 @@ std::pair<result, char32_t*> sse_convert_utf16_to_utf32_with_errors(const char16 | |
+ } | |
+ /* end file src/westmere/sse_convert_utf16_to_utf32.cpp */ | |
+ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=westmere/sse_convert_utf32_to_utf8.cpp | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=westmere/sse_convert_utf32_to_utf8.cpp | |
+ /* begin file src/westmere/sse_convert_utf32_to_utf8.cpp */ | |
+ std::pair<const char32_t*, char*> sse_convert_utf32_to_utf8(const char32_t* buf, size_t len, char* utf8_output) { | |
+- | |
+ const char32_t* end = buf + len; | |
+ | |
+ const __m128i v_0000 = _mm_setzero_si128(); | |
+@@ -26056,9 +26243,10 @@ std::pair<const char32_t*, char*> sse_convert_utf32_to_utf8(const char32_t* buf, | |
+ const __m128i v_7fffffff = _mm_set1_epi32((uint32_t)0x7fffffff); | |
+ __m128i running_max = _mm_setzero_si128(); | |
+ __m128i forbidden_bytemask = _mm_setzero_si128(); | |
+- const size_t safety_margin = 11; // to avoid overruns, see issue https://github.com/simdutf/simdutf/issues/92 | |
++ const size_t safety_margin = 12; // to avoid overruns, see issue https://github.com/simdutf/simdutf/issues/92 | |
+ | |
+ while (buf + 16 + safety_margin <= end) { | |
++ // We load two 16 bytes registers for a total of 32 bytes or 16 characters. | |
+ __m128i in = _mm_loadu_si128((__m128i*)buf); | |
+ __m128i nextin = _mm_loadu_si128((__m128i*)buf+1); | |
+ running_max = _mm_max_epu32(_mm_max_epu32(in, running_max), nextin); | |
+@@ -26070,6 +26258,10 @@ std::pair<const char32_t*, char*> sse_convert_utf32_to_utf8(const char32_t* buf, | |
+ | |
+ // Check for ASCII fast path | |
+ if(_mm_testz_si128(in_16, v_ff80)) { // ASCII fast path!!!! | |
++ // We eagerly load another 32 bytes, hoping that they will be ASCII too. | |
++ // The intuition is that we try to collect 16 ASCII characters which requires | |
++ // a total of 64 bytes of input. If we fail, we just pass thirdin and fourthin | |
++ // as our new inputs. | |
+ __m128i thirdin = _mm_loadu_si128((__m128i*)buf+2); | |
+ __m128i fourthin = _mm_loadu_si128((__m128i*)buf+3); | |
+ running_max = _mm_max_epu32(_mm_max_epu32(thirdin, running_max), fourthin); | |
+@@ -26085,6 +26277,9 @@ std::pair<const char32_t*, char*> sse_convert_utf32_to_utf8(const char32_t* buf, | |
+ utf8_output += 8; | |
+ // Proceed with next input | |
+ in_16 = nextin_16; | |
++ // We need to update in and nextin because they are used later. | |
++ in = thirdin; | |
++ nextin = fourthin; | |
+ } else { | |
+ // 1. pack the bytes | |
+ const __m128i utf8_packed = _mm_packus_epi16(in_16, nextin_16); | |
+@@ -26146,11 +26341,10 @@ std::pair<const char32_t*, char*> sse_convert_utf32_to_utf8(const char32_t* buf, | |
+ continue; | |
+ } | |
+ | |
+- | |
+ // Check for overflow in packing | |
++ | |
+ const __m128i saturation_bytemask = _mm_cmpeq_epi32(_mm_and_si128(_mm_or_si128(in, nextin), v_ffff0000), v_0000); | |
+ const uint32_t saturation_bitmask = static_cast<uint32_t>(_mm_movemask_epi8(saturation_bytemask)); | |
+- | |
+ if (saturation_bitmask == 0xffff) { | |
+ // case: words from register produce either 1, 2 or 3 UTF-8 bytes | |
+ const __m128i v_d800 = _mm_set1_epi16((uint16_t)0xd800); | |
+@@ -26183,25 +26377,25 @@ std::pair<const char32_t*, char*> sse_convert_utf32_to_utf8(const char32_t* buf, | |
+ * t2 => [0ccc|cccc] [10cc|cccc] | |
+ * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) | |
+ */ | |
+-#define vec(x) _mm_set1_epi16(static_cast<uint16_t>(x)) | |
++#define simdutf_vec(x) _mm_set1_epi16(static_cast<uint16_t>(x)) | |
+ // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] | |
+ const __m128i t0 = _mm_shuffle_epi8(in_16, dup_even); | |
+ // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] | |
+- const __m128i t1 = _mm_and_si128(t0, vec(0b0011111101111111)); | |
++ const __m128i t1 = _mm_and_si128(t0, simdutf_vec(0b0011111101111111)); | |
+ // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] | |
+- const __m128i t2 = _mm_or_si128 (t1, vec(0b1000000000000000)); | |
++ const __m128i t2 = _mm_or_si128 (t1, simdutf_vec(0b1000000000000000)); | |
+ | |
+ // [aaaa|bbbb|bbcc|cccc] => [0000|aaaa|bbbb|bbcc] | |
+ const __m128i s0 = _mm_srli_epi16(in_16, 4); | |
+ // [0000|aaaa|bbbb|bbcc] => [0000|aaaa|bbbb|bb00] | |
+- const __m128i s1 = _mm_and_si128(s0, vec(0b0000111111111100)); | |
++ const __m128i s1 = _mm_and_si128(s0, simdutf_vec(0b0000111111111100)); | |
+ // [0000|aaaa|bbbb|bb00] => [00bb|bbbb|0000|aaaa] | |
+- const __m128i s2 = _mm_maddubs_epi16(s1, vec(0x0140)); | |
++ const __m128i s2 = _mm_maddubs_epi16(s1, simdutf_vec(0x0140)); | |
+ // [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] | |
+- const __m128i s3 = _mm_or_si128(s2, vec(0b1100000011100000)); | |
+- const __m128i m0 = _mm_andnot_si128(one_or_two_bytes_bytemask, vec(0b0100000000000000)); | |
++ const __m128i s3 = _mm_or_si128(s2, simdutf_vec(0b1100000011100000)); | |
++ const __m128i m0 = _mm_andnot_si128(one_or_two_bytes_bytemask, simdutf_vec(0b0100000000000000)); | |
+ const __m128i s4 = _mm_xor_si128(s3, m0); | |
+-#undef vec | |
++#undef simdutf_vec | |
+ | |
+ // 4. expand words 16-bit => 32-bit | |
+ const __m128i out0 = _mm_unpacklo_epi16(t2, s4); | |
+@@ -26297,9 +26491,10 @@ std::pair<result, char*> sse_convert_utf32_to_utf8_with_errors(const char32_t* b | |
+ const __m128i v_7fffffff = _mm_set1_epi32((uint32_t)0x7fffffff); | |
+ const __m128i v_10ffff = _mm_set1_epi32((uint32_t)0x10ffff); | |
+ | |
+- const size_t safety_margin = 11; // to avoid overruns, see issue https://github.com/simdutf/simdutf/issues/92 | |
++ const size_t safety_margin = 12; // to avoid overruns, see issue https://github.com/simdutf/simdutf/issues/92 | |
+ | |
+ while (buf + 16 + safety_margin <= end) { | |
++ // We load two 16 bytes registers for a total of 32 bytes or 16 characters. | |
+ __m128i in = _mm_loadu_si128((__m128i*)buf); | |
+ __m128i nextin = _mm_loadu_si128((__m128i*)buf+1); | |
+ | |
+@@ -26316,6 +26511,10 @@ std::pair<result, char*> sse_convert_utf32_to_utf8_with_errors(const char32_t* b | |
+ | |
+ // Check for ASCII fast path | |
+ if(_mm_testz_si128(in_16, v_ff80)) { // ASCII fast path!!!! | |
++ // We eagerly load another 32 bytes, hoping that they will be ASCII too. | |
++ // The intuition is that we try to collect 16 ASCII characters which requires | |
++ // a total of 64 bytes of input. If we fail, we just pass thirdin and fourthin | |
++ // as our new inputs. | |
+ __m128i thirdin = _mm_loadu_si128((__m128i*)buf+2); | |
+ __m128i fourthin = _mm_loadu_si128((__m128i*)buf+3); | |
+ __m128i nextin_16 = _mm_packus_epi32(_mm_and_si128(thirdin, v_7fffffff), _mm_and_si128(fourthin, v_7fffffff)); | |
+@@ -26334,6 +26533,9 @@ std::pair<result, char*> sse_convert_utf32_to_utf8_with_errors(const char32_t* b | |
+ if(static_cast<uint16_t>(_mm_movemask_epi8(_mm_cmpeq_epi32(next_max_input, v_10ffff))) != 0xffff) { | |
+ return std::make_pair(result(error_code::TOO_LARGE, buf - start), utf8_output); | |
+ } | |
++ // We need to update in and nextin because they are used later. | |
++ in = thirdin; | |
++ nextin = fourthin; | |
+ } else { | |
+ // 1. pack the bytes | |
+ const __m128i utf8_packed = _mm_packus_epi16(in_16, nextin_16); | |
+@@ -26437,25 +26639,25 @@ std::pair<result, char*> sse_convert_utf32_to_utf8_with_errors(const char32_t* b | |
+ * t2 => [0ccc|cccc] [10cc|cccc] | |
+ * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb]) | |
+ */ | |
+-#define vec(x) _mm_set1_epi16(static_cast<uint16_t>(x)) | |
++#define simdutf_vec(x) _mm_set1_epi16(static_cast<uint16_t>(x)) | |
+ // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc] | |
+ const __m128i t0 = _mm_shuffle_epi8(in_16, dup_even); | |
+ // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc] | |
+- const __m128i t1 = _mm_and_si128(t0, vec(0b0011111101111111)); | |
++ const __m128i t1 = _mm_and_si128(t0, simdutf_vec(0b0011111101111111)); | |
+ // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc] | |
+- const __m128i t2 = _mm_or_si128 (t1, vec(0b1000000000000000)); | |
++ const __m128i t2 = _mm_or_si128 (t1, simdutf_vec(0b1000000000000000)); | |
+ | |
+ // [aaaa|bbbb|bbcc|cccc] => [0000|aaaa|bbbb|bbcc] | |
+ const __m128i s0 = _mm_srli_epi16(in_16, 4); | |
+ // [0000|aaaa|bbbb|bbcc] => [0000|aaaa|bbbb|bb00] | |
+- const __m128i s1 = _mm_and_si128(s0, vec(0b0000111111111100)); | |
++ const __m128i s1 = _mm_and_si128(s0, simdutf_vec(0b0000111111111100)); | |
+ // [0000|aaaa|bbbb|bb00] => [00bb|bbbb|0000|aaaa] | |
+- const __m128i s2 = _mm_maddubs_epi16(s1, vec(0x0140)); | |
++ const __m128i s2 = _mm_maddubs_epi16(s1, simdutf_vec(0x0140)); | |
+ // [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa] | |
+- const __m128i s3 = _mm_or_si128(s2, vec(0b1100000011100000)); | |
+- const __m128i m0 = _mm_andnot_si128(one_or_two_bytes_bytemask, vec(0b0100000000000000)); | |
++ const __m128i s3 = _mm_or_si128(s2, simdutf_vec(0b1100000011100000)); | |
++ const __m128i m0 = _mm_andnot_si128(one_or_two_bytes_bytemask, simdutf_vec(0b0100000000000000)); | |
+ const __m128i s4 = _mm_xor_si128(s3, m0); | |
+-#undef vec | |
++#undef simdutf_vec | |
+ | |
+ // 4. expand words 16-bit => 32-bit | |
+ const __m128i out0 = _mm_unpacklo_epi16(t2, s4); | |
+@@ -26529,7 +26731,7 @@ std::pair<result, char*> sse_convert_utf32_to_utf8_with_errors(const char32_t* b | |
+ return std::make_pair(result(error_code::SUCCESS, buf - start), utf8_output); | |
+ } | |
+ /* end file src/westmere/sse_convert_utf32_to_utf8.cpp */ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=westmere/sse_convert_utf32_to_utf16.cpp | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=westmere/sse_convert_utf32_to_utf16.cpp | |
+ /* begin file src/westmere/sse_convert_utf32_to_utf16.cpp */ | |
+ template <endianness big_endian> | |
+ std::pair<const char32_t*, char16_t*> sse_convert_utf32_to_utf16(const char32_t* buf, size_t len, char16_t* utf16_output) { | |
+@@ -26668,7 +26870,7 @@ std::pair<result, char16_t*> sse_convert_utf32_to_utf16_with_errors(const char32 | |
+ } // namespace westmere | |
+ } // namespace simdutf | |
+ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=generic/buf_block_reader.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=generic/buf_block_reader.h | |
+ /* begin file src/generic/buf_block_reader.h */ | |
+ namespace simdutf { | |
+ namespace westmere { | |
+@@ -26763,7 +26965,7 @@ simdutf_really_inline void buf_block_reader<STEP_SIZE>::advance() { | |
+ } // namespace westmere | |
+ } // namespace simdutf | |
+ /* end file src/generic/buf_block_reader.h */ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=generic/utf8_validation/utf8_lookup4_algorithm.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=generic/utf8_validation/utf8_lookup4_algorithm.h | |
+ /* begin file src/generic/utf8_validation/utf8_lookup4_algorithm.h */ | |
+ namespace simdutf { | |
+ namespace westmere { | |
+@@ -26952,7 +27154,7 @@ using utf8_validation::utf8_checker; | |
+ } // namespace westmere | |
+ } // namespace simdutf | |
+ /* end file src/generic/utf8_validation/utf8_lookup4_algorithm.h */ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=generic/utf8_validation/utf8_validator.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=generic/utf8_validation/utf8_validator.h | |
+ /* begin file src/generic/utf8_validation/utf8_validator.h */ | |
+ namespace simdutf { | |
+ namespace westmere { | |
+@@ -27079,7 +27281,7 @@ result generic_validate_ascii_with_errors(const char * input, size_t length) { | |
+ } // namespace simdutf | |
+ /* end file src/generic/utf8_validation/utf8_validator.h */ | |
+ // transcoding from UTF-8 to UTF-16 | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=generic/utf8_to_utf16/valid_utf8_to_utf16.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=generic/utf8_to_utf16/valid_utf8_to_utf16.h | |
+ /* begin file src/generic/utf8_to_utf16/valid_utf8_to_utf16.h */ | |
+ | |
+ | |
+@@ -27140,7 +27342,7 @@ simdutf_warn_unused size_t convert_valid(const char* input, size_t size, | |
+ utf8_end_of_code_point_mask >>= consumed; | |
+ } | |
+ // At this point there may remain between 0 and 12 bytes in the | |
+- // 64-byte block.These bytes will be processed again. So we have an | |
++ // 64-byte block. These bytes will be processed again. So we have an | |
+ // 80% efficiency (in the worst case). In practice we expect an | |
+ // 85% to 90% efficiency. | |
+ } | |
+@@ -27154,7 +27356,7 @@ simdutf_warn_unused size_t convert_valid(const char* input, size_t size, | |
+ } // namespace westmere | |
+ } // namespace simdutf | |
+ /* end file src/generic/utf8_to_utf16/valid_utf8_to_utf16.h */ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=generic/utf8_to_utf16/utf8_to_utf16.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=generic/utf8_to_utf16/utf8_to_utf16.h | |
+ /* begin file src/generic/utf8_to_utf16/utf8_to_utf16.h */ | |
+ | |
+ | |
+@@ -27286,7 +27488,19 @@ using namespace simd; | |
+ simdutf_really_inline size_t convert(const char* in, size_t size, char16_t* utf16_output) { | |
+ size_t pos = 0; | |
+ char16_t* start{utf16_output}; | |
+- const size_t safety_margin = 16; // to avoid overruns! | |
++ // In the worst case, we have the haswell kernel which can cause an overflow of | |
++ // 8 bytes when calling convert_masked_utf8_to_utf16. If you skip the last 16 bytes, | |
++ // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate | |
++ // much more than 8 bytes. However, you cannot generally assume that you have valid | |
++ // UTF-8 input, so we are going to go back from the end counting 8 leading bytes, | |
++ // to give us a good margin. | |
++ size_t leading_byte = 0; | |
++ size_t margin = size; | |
++ for(; margin > 0 && leading_byte < 8; margin--) { | |
++ leading_byte += (int8_t(in[margin-1]) > -65); | |
++ } | |
++ // If the input is long enough, then we have that margin-1 is the eight last leading byte. | |
++ const size_t safety_margin = size - margin + 1; // to avoid overruns! | |
+ while(pos + 64 + safety_margin <= size) { | |
+ simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos)); | |
+ if(input.is_ascii()) { | |
+@@ -27331,7 +27545,7 @@ using namespace simd; | |
+ utf8_end_of_code_point_mask >>= consumed; | |
+ } | |
+ // At this point there may remain between 0 and 12 bytes in the | |
+- // 64-byte block.These bytes will be processed again. So we have an | |
++ // 64-byte block. These bytes will be processed again. So we have an | |
+ // 80% efficiency (in the worst case). In practice we expect an | |
+ // 85% to 90% efficiency. | |
+ } | |
+@@ -27349,7 +27563,19 @@ using namespace simd; | |
+ simdutf_really_inline result convert_with_errors(const char* in, size_t size, char16_t* utf16_output) { | |
+ size_t pos = 0; | |
+ char16_t* start{utf16_output}; | |
+- const size_t safety_margin = 16; // to avoid overruns! | |
++ // In the worst case, we have the haswell kernel which can cause an overflow of | |
++ // 8 bytes when calling convert_masked_utf8_to_utf16. If you skip the last 16 bytes, | |
++ // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate | |
++ // much more than 8 bytes. However, you cannot generally assume that you have valid | |
++ // UTF-8 input, so we are going to go back from the end counting 8 leading bytes, | |
++ // to give us a good margin. | |
++ size_t leading_byte = 0; | |
++ size_t margin = size; | |
++ for(; margin > 0 && leading_byte < 8; margin--) { | |
++ leading_byte += (int8_t(in[margin-1]) > -65); | |
++ } | |
++ // If the input is long enough, then we have that margin-1 is the eight last leading byte. | |
++ const size_t safety_margin = size - margin + 1; // to avoid overruns! | |
+ while(pos + 64 + safety_margin <= size) { | |
+ simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos)); | |
+ if(input.is_ascii()) { | |
+@@ -27401,7 +27627,7 @@ using namespace simd; | |
+ utf8_end_of_code_point_mask >>= consumed; | |
+ } | |
+ // At this point there may remain between 0 and 12 bytes in the | |
+- // 64-byte block.These bytes will be processed again. So we have an | |
++ // 64-byte block. These bytes will be processed again. So we have an | |
+ // 80% efficiency (in the worst case). In practice we expect an | |
+ // 85% to 90% efficiency. | |
+ } | |
+@@ -27438,7 +27664,7 @@ using namespace simd; | |
+ } // namespace simdutf | |
+ /* end file src/generic/utf8_to_utf16/utf8_to_utf16.h */ | |
+ // transcoding from UTF-8 to UTF-32 | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=generic/utf8_to_utf32/valid_utf8_to_utf32.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=generic/utf8_to_utf32/valid_utf8_to_utf32.h | |
+ /* begin file src/generic/utf8_to_utf32/valid_utf8_to_utf32.h */ | |
+ | |
+ namespace simdutf { | |
+@@ -27484,7 +27710,7 @@ simdutf_warn_unused size_t convert_valid(const char* input, size_t size, | |
+ } // namespace westmere | |
+ } // namespace simdutf | |
+ /* end file src/generic/utf8_to_utf32/valid_utf8_to_utf32.h */ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=generic/utf8_to_utf32/utf8_to_utf32.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=generic/utf8_to_utf32/utf8_to_utf32.h | |
+ /* begin file src/generic/utf8_to_utf32/utf8_to_utf32.h */ | |
+ | |
+ | |
+@@ -27616,7 +27842,19 @@ using namespace simd; | |
+ simdutf_really_inline size_t convert(const char* in, size_t size, char32_t* utf32_output) { | |
+ size_t pos = 0; | |
+ char32_t* start{utf32_output}; | |
+- const size_t safety_margin = 16; // to avoid overruns! | |
++ // In the worst case, we have the haswell kernel which can cause an overflow of | |
++ // 8 bytes when calling convert_masked_utf8_to_utf32. If you skip the last 16 bytes, | |
++ // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate | |
++ // much more than 8 bytes. However, you cannot generally assume that you have valid | |
++ // UTF-8 input, so we are going to go back from the end counting 4 leading bytes, | |
++ // to give us a good margin. | |
++ size_t leading_byte = 0; | |
++ size_t margin = size; | |
++ for(; margin > 0 && leading_byte < 4; margin--) { | |
++ leading_byte += (int8_t(in[margin-1]) > -65); | |
++ } | |
++ // If the input is long enough, then we have that margin-1 is the fourth last leading byte. | |
++ const size_t safety_margin = size - margin + 1; // to avoid overruns! | |
+ while(pos + 64 + safety_margin <= size) { | |
+ simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos)); | |
+ if(input.is_ascii()) { | |
+@@ -27661,7 +27899,7 @@ using namespace simd; | |
+ utf8_end_of_code_point_mask >>= consumed; | |
+ } | |
+ // At this point there may remain between 0 and 12 bytes in the | |
+- // 64-byte block.These bytes will be processed again. So we have an | |
++ // 64-byte block. These bytes will be processed again. So we have an | |
+ // 80% efficiency (in the worst case). In practice we expect an | |
+ // 85% to 90% efficiency. | |
+ } | |
+@@ -27678,7 +27916,19 @@ using namespace simd; | |
+ simdutf_really_inline result convert_with_errors(const char* in, size_t size, char32_t* utf32_output) { | |
+ size_t pos = 0; | |
+ char32_t* start{utf32_output}; | |
+- const size_t safety_margin = 16; // to avoid overruns! | |
++ // In the worst case, we have the haswell kernel which can cause an overflow of | |
++ // 8 bytes when calling convert_masked_utf8_to_utf32. If you skip the last 16 bytes, | |
++ // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate | |
++ // much more than 8 bytes. However, you cannot generally assume that you have valid | |
++ // UTF-8 input, so we are going to go back from the end counting 4 leading bytes, | |
++ // to give us a good margin. | |
++ size_t leading_byte = 0; | |
++ size_t margin = size; | |
++ for(; margin > 0 && leading_byte < 4; margin--) { | |
++ leading_byte += (int8_t(in[margin-1]) > -65); | |
++ } | |
++ // If the input is long enough, then we have that margin-1 is the fourth last leading byte. | |
++ const size_t safety_margin = size - margin + 1; // to avoid overruns! | |
+ while(pos + 64 + safety_margin <= size) { | |
+ simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos)); | |
+ if(input.is_ascii()) { | |
+@@ -27728,7 +27978,7 @@ using namespace simd; | |
+ utf8_end_of_code_point_mask >>= consumed; | |
+ } | |
+ // At this point there may remain between 0 and 12 bytes in the | |
+- // 64-byte block.These bytes will be processed again. So we have an | |
++ // 64-byte block. These bytes will be processed again. So we have an | |
+ // 80% efficiency (in the worst case). In practice we expect an | |
+ // 85% to 90% efficiency. | |
+ } | |
+@@ -27761,7 +28011,7 @@ using namespace simd; | |
+ } // namespace simdutf | |
+ /* end file src/generic/utf8_to_utf32/utf8_to_utf32.h */ | |
+ // other functions | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=generic/utf8.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=generic/utf8.h | |
+ /* begin file src/generic/utf8.h */ | |
+ | |
+ namespace simdutf { | |
+@@ -27808,7 +28058,7 @@ simdutf_really_inline size_t utf32_length_from_utf8(const char* in, size_t size) | |
+ } // namespace westmere | |
+ } // namespace simdutf | |
+ /* end file src/generic/utf8.h */ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=generic/utf16.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=generic/utf16.h | |
+ /* begin file src/generic/utf16.h */ | |
+ namespace simdutf { | |
+ namespace westmere { | |
+@@ -28340,7 +28590,7 @@ simdutf_warn_unused size_t implementation::utf32_length_from_utf8(const char * i | |
+ } // namespace westmere | |
+ } // namespace simdutf | |
+ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/westmere/end.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=simdutf/westmere/end.h | |
+ /* begin file src/simdutf/westmere/end.h */ | |
+ #if SIMDUTF_CAN_ALWAYS_RUN_WESTMERE | |
+ // nothing needed. | |
+diff --git a/deps/simdutf/simdutf.h b/deps/simdutf/simdutf.h | |
+index 80189d316c..618003e7c2 100644 | |
+--- a/deps/simdutf/simdutf.h | |
++++ b/deps/simdutf/simdutf.h | |
+@@ -1,11 +1,11 @@ | |
+-/* auto-generated on 2023-02-24 17:01:43 -0500. Do not edit! */ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/include, filename=simdutf.h | |
++/* auto-generated on 2023-05-12 15:20:29 -0400. Do not edit! */ | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/include, filename=simdutf.h | |
+ /* begin file include/simdutf.h */ | |
+ #ifndef SIMDUTF_H | |
+ #define SIMDUTF_H | |
+ #include <cstring> | |
+ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/include, filename=simdutf/compiler_check.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/include, filename=simdutf/compiler_check.h | |
+ /* begin file include/simdutf/compiler_check.h */ | |
+ #ifndef SIMDUTF_COMPILER_CHECK_H | |
+ #define SIMDUTF_COMPILER_CHECK_H | |
+@@ -43,13 +43,13 @@ | |
+ | |
+ #endif // SIMDUTF_COMPILER_CHECK_H | |
+ /* end file include/simdutf/compiler_check.h */ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/include, filename=simdutf/common_defs.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/include, filename=simdutf/common_defs.h | |
+ /* begin file include/simdutf/common_defs.h */ | |
+ #ifndef SIMDUTF_COMMON_DEFS_H | |
+ #define SIMDUTF_COMMON_DEFS_H | |
+ | |
+ #include <cassert> | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/include, filename=simdutf/portability.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/include, filename=simdutf/portability.h | |
+ /* begin file include/simdutf/portability.h */ | |
+ #ifndef SIMDUTF_PORTABILITY_H | |
+ #define SIMDUTF_PORTABILITY_H | |
+@@ -144,6 +144,8 @@ | |
+ // POWER processors. Please see https://github.com/lemire/simdutf/issues/51 | |
+ #elif defined(__s390__) | |
+ // s390 IBM system. Big endian. | |
++#elif (defined(__riscv) || defined(__riscv__)) && __riscv_xlen == 64 | |
++// RISC-V 64-bit | |
+ #else | |
+ // The simdutf library is designed | |
+ // for 64-bit processors and it seems that you are not | |
+@@ -278,7 +280,7 @@ use a 64-bit target such as x64, 64-bit ARM or 64-bit PPC.") | |
+ | |
+ #endif // SIMDUTF_PORTABILITY_H | |
+ /* end file include/simdutf/portability.h */ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/include, filename=simdutf/avx512.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/include, filename=simdutf/avx512.h | |
+ /* begin file include/simdutf/avx512.h */ | |
+ #ifndef SIMDUTF_AVX512_H_ | |
+ #define SIMDUTF_AVX512_H_ | |
+@@ -479,7 +481,7 @@ use a 64-bit target such as x64, 64-bit ARM or 64-bit PPC.") | |
+ | |
+ #endif // SIMDUTF_COMMON_DEFS_H | |
+ /* end file include/simdutf/common_defs.h */ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/include, filename=simdutf/encoding_types.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/include, filename=simdutf/encoding_types.h | |
+ /* begin file include/simdutf/encoding_types.h */ | |
+ #include <string> | |
+ | |
+@@ -527,7 +529,7 @@ size_t bom_byte_size(encoding_type bom); | |
+ } // BOM namespace | |
+ } // simdutf namespace | |
+ /* end file include/simdutf/encoding_types.h */ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/include, filename=simdutf/error.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/include, filename=simdutf/error.h | |
+ /* begin file include/simdutf/error.h */ | |
+ #ifndef ERROR_H | |
+ #define ERROR_H | |
+@@ -564,7 +566,7 @@ SIMDUTF_PUSH_DISABLE_WARNINGS | |
+ SIMDUTF_DISABLE_UNDESIRED_WARNINGS | |
+ | |
+ // Public API | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/include, filename=simdutf/simdutf_version.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/include, filename=simdutf/simdutf_version.h | |
+ /* begin file include/simdutf/simdutf_version.h */ | |
+ // /include/simdutf/simdutf_version.h automatically generated by release.py, | |
+ // do not change by hand | |
+@@ -572,7 +574,7 @@ SIMDUTF_DISABLE_UNDESIRED_WARNINGS | |
+ #define SIMDUTF_SIMDUTF_VERSION_H | |
+ | |
+ /** The version of simdutf being used (major.minor.revision) */ | |
+-#define SIMDUTF_VERSION "3.2.2" | |
++#define SIMDUTF_VERSION "3.2.9" | |
+ | |
+ namespace simdutf { | |
+ enum { | |
+@@ -587,13 +589,13 @@ enum { | |
+ /** | |
+ * The revision (major.minor.REVISION) of simdutf being used. | |
+ */ | |
+- SIMDUTF_VERSION_REVISION = 2 | |
++ SIMDUTF_VERSION_REVISION = 9 | |
+ }; | |
+ } // namespace simdutf | |
+ | |
+ #endif // SIMDUTF_SIMDUTF_VERSION_H | |
+ /* end file include/simdutf/simdutf_version.h */ | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/include, filename=simdutf/implementation.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/include, filename=simdutf/implementation.h | |
+ /* begin file include/simdutf/implementation.h */ | |
+ #ifndef SIMDUTF_IMPLEMENTATION_H | |
+ #define SIMDUTF_IMPLEMENTATION_H | |
+@@ -603,7 +605,7 @@ enum { | |
+ #endif | |
+ #include <vector> | |
+ #include <tuple> | |
+-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/include, filename=simdutf/internal/isadetection.h | |
++// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/include, filename=simdutf/internal/isadetection.h | |
+ /* begin file include/simdutf/internal/isadetection.h */ | |
+ /* From | |
+ https://github.com/endorno/pytorch/blob/master/torch/lib/TH/generic/simd/simd.h | |
+@@ -706,6 +708,7 @@ namespace cpuid_bit { | |
+ // EAX = 0x01 | |
+ constexpr uint32_t pclmulqdq = uint32_t(1) << 1; ///< @private bit 1 of ECX for EAX=0x1 | |
+ constexpr uint32_t sse42 = uint32_t(1) << 20; ///< @private bit 20 of ECX for EAX=0x1 | |
++ constexpr uint32_t osxsave = (uint32_t(1) << 26) | (uint32_t(1) << 27); ///< @private bits 26+27 of ECX for EAX=0x1 | |
+ | |
+ // EAX = 0x7f (Structured Extended Feature Flags), ECX = 0x00 (Sub-leaf) | |
+ // See: "Table 3-8. Information Returned by CPUID Instruction" | |
+@@ -731,6 +734,10 @@ namespace cpuid_bit { | |
+ namespace edx { | |
+ constexpr uint32_t avx512vp2intersect = uint32_t(1) << 8; | |
+ } | |
++ namespace xcr0_bit { | |
++ constexpr uint64_t avx256_saved = uint64_t(1) << 2; ///< @private bit 2 = AVX | |
++ constexpr uint64_t avx512_saved = uint64_t(7) << 5; ///< @private bits 5,6,7 = opmask, ZMM_hi256, hi16_ZMM | |
++ } | |
+ } | |
+ } | |
+ | |
+@@ -740,7 +747,7 @@ static inline void cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx, | |
+ uint32_t *edx) { | |
+ #if defined(_MSC_VER) | |
+ int cpu_info[4]; | |
+- __cpuid(cpu_info, *eax); | |
++ __cpuidex(cpu_info, *eax, *ecx); | |
+ *eax = cpu_info[0]; | |
+ *ebx = cpu_info[1]; | |
+ *ecx = cpu_info[2]; | |
+@@ -758,6 +765,16 @@ static inline void cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx, | |
+ #endif | |
+ } | |
+ | |
++static inline uint64_t xgetbv() { | |
++ #if defined(_MSC_VER) | |
++ return _xgetbv(0); | |
++ #else | |
++ uint32_t xcr0_lo, xcr0_hi; | |
++ asm volatile("xgetbv\n\t" : "=a" (xcr0_lo), "=d" (xcr0_hi) : "c" (0)); | |
++ return xcr0_lo | ((uint64_t)xcr0_hi << 32); | |
++ #endif | |
++ } | |
++ | |
+ static inline uint32_t detect_supported_architectures() { | |
+ uint32_t eax; | |
+ uint32_t ebx = 0; | |
+@@ -777,6 +794,16 @@ static inline uint32_t detect_supported_architectures() { | |
+ host_isa |= instruction_set::PCLMULQDQ; | |
+ } | |
+ | |
++ if ((ecx & cpuid_bit::osxsave) != cpuid_bit::osxsave) { | |
++ return host_isa; | |
++ } | |
++ | |
++ // xgetbv for checking if the OS saves registers | |
++ uint64_t xcr0 = xgetbv(); | |
++ | |
++ if ((xcr0 & cpuid_bit::xcr0_bit::avx256_saved) == 0) { | |
++ return host_isa; | |
++ } | |
+ // ECX for EAX=0x7 | |
+ eax = 0x7; | |
+ ecx = 0x0; // Sub-leaf = 0 | |
+@@ -790,6 +817,9 @@ static inline uint32_t detect_supported_architectures() { | |
+ if (ebx & cpuid_bit::ebx::bmi2) { | |
+ host_isa |= instruction_set::BMI2; | |
+ } | |
++ if (!((xcr0 & cpuid_bit::xcr0_bit::avx512_saved) == cpuid_bit::xcr0_bit::avx512_saved)) { | |
++ return host_isa; | |
++ } | |
+ if (ebx & cpuid_bit::ebx::avx512f) { | |
+ host_isa |= instruction_set::AVX512F; | |
+ } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment