Created
January 15, 2018 07:08
-
-
Save laruence/c8de7921ef8f1bf346de746285fd78ca to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/Zend/zend_operators.c b/Zend/zend_operators.c | |
index a8af4bf..c761db2 100644 | |
--- a/Zend/zend_operators.c | |
+++ b/Zend/zend_operators.c | |
@@ -32,6 +32,10 @@ | |
#include "zend_exceptions.h" | |
#include "zend_closures.h" | |
+#ifdef __SSE4_2__ | |
+#include <nmmintrin.h> | |
+#endif | |
+ | |
#if ZEND_USE_TOLOWER_L | |
#include <locale.h> | |
static _locale_t current_locale = NULL; | |
@@ -2625,11 +2629,62 @@ ZEND_API zend_string* ZEND_FASTCALL zend_string_tolower_ex(zend_string *str, int | |
{ | |
register unsigned char *p = (unsigned char*)ZSTR_VAL(str); | |
register unsigned char *end = p + ZSTR_LEN(str); | |
+ register unsigned char *r; | |
+ zend_string *res = NULL; | |
+ | |
+#if __SSE4_2__ | |
+ if (UNEXPECTED(ZSTR_LEN(str) > 15)) { | |
+ __m128i range = _mm_setr_epi8('A', 'Z', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); | |
+ __m128i distance = _mm_set1_epi8(0x20); | |
+ uint8_t mode = _SIDD_UBYTE_OPS | _SIDD_CMP_RANGES | _SIDD_UNIT_MASK; | |
+ | |
+ for (; end - p > 15; p += 16) { | |
+ __m128i s128 = _mm_loadu_si128((__m128i*)p); | |
+ __m128i r128 = _mm_cmpistrm(range, s128, mode); | |
+ | |
+ if (_mm_movemask_epi8(r128)) { | |
+ __m128i mask = _mm_and_si128(r128, distance); | |
+ | |
+ res = zend_string_alloc(ZSTR_LEN(str), persistent); | |
+ if (p != (unsigned char*)ZSTR_VAL(str)) { | |
+ memcpy(ZSTR_VAL(res), ZSTR_VAL(str), p - (unsigned char*)ZSTR_VAL(str)); | |
+ } | |
+ r = ZSTR_VAL(res) + (p - (unsigned char*)ZSTR_VAL(str)); | |
+ _mm_storeu_si128((__m128i*)r, _mm_xor_si128(s128, mask)); | |
+ | |
+ p += 16; | |
+ r += 16; | |
+ for (; end - p > 15; p += 16) { | |
+ s128 = _mm_loadu_si128((__m128i*)p); | |
+ r128 = _mm_cmpistrm(range, s128, mode); | |
+ | |
+ if (_mm_movemask_epi8(r128)) { | |
+ __m128i mask = _mm_and_si128(r128, distance); | |
+ _mm_storeu_si128((__m128i*)r, _mm_xor_si128(s128, mask)); | |
+ } else { | |
+ _mm_storeu_si128((__m128i*)r, s128); | |
+ } | |
+ r += 16; | |
+ } | |
+ break; | |
+ } | |
+ } | |
+ | |
+ if (res) { | |
+ while (p < end) { | |
+ *r = zend_tolower_ascii(*p); | |
+ r++; | |
+ p++; | |
+ } | |
+ *r = '\0'; | |
+ return res; | |
+ } | |
+ } | |
+#endif | |
while (p < end) { | |
if (*p != zend_tolower_ascii(*p)) { | |
- zend_string *res = zend_string_alloc(ZSTR_LEN(str), persistent); | |
- register unsigned char *r; | |
+ res = zend_string_alloc(ZSTR_LEN(str), persistent); | |
if (p != (unsigned char*)ZSTR_VAL(str)) { | |
memcpy(ZSTR_VAL(res), ZSTR_VAL(str), p - (unsigned char*)ZSTR_VAL(str)); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment