Skip to content

Instantly share code, notes, and snippets.

@laruence
Last active February 12, 2018 01:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save laruence/f293d9e39ab932e20cbf62c2ec072356 to your computer and use it in GitHub Desktop.
Save laruence/f293d9e39ab932e20cbf62c2ec072356 to your computer and use it in GitHub Desktop.
diff --git a/Zend/zend_cpuinfo.h b/Zend/zend_cpuinfo.h
index d0d3a93..f0c298d 100644
--- a/Zend/zend_cpuinfo.h
+++ b/Zend/zend_cpuinfo.h
@@ -106,42 +106,42 @@ ZEND_API int zend_cpu_supports(zend_cpu_feature feature);
* before all PLT symbols are resloved. in other words,
* resolver functions should not depends any external
* functions */
-static zend_always_inline int zend_cpu_support_sse2() {
+static zend_always_inline int zend_cpu_supports_sse2() {
#if PHP_HAVE_BUILTIN_CPU_INIT
__builtin_cpu_init();
#endif
return __builtin_cpu_supports("sse2");
}
-static zend_always_inline int zend_cpu_support_sse3() {
+static zend_always_inline int zend_cpu_supports_ssse3() {
#if PHP_HAVE_BUILTIN_CPU_INIT
__builtin_cpu_init();
#endif
- return __builtin_cpu_supports("sse3");
+ return __builtin_cpu_supports("ssse3");
}
-static zend_always_inline int zend_cpu_support_sse41() {
+static zend_always_inline int zend_cpu_supports_sse41() {
#if PHP_HAVE_BUILTIN_CPU_INIT
__builtin_cpu_init();
#endif
return __builtin_cpu_supports("sse4.1");
}
-static zend_always_inline int zend_cpu_support_sse42() {
+static zend_always_inline int zend_cpu_supports_sse42() {
#if PHP_HAVE_BUILTIN_CPU_INIT
__builtin_cpu_init();
#endif
return __builtin_cpu_supports("sse4.2");
}
-static zend_always_inline int zend_cpu_support_avx() {
+static zend_always_inline int zend_cpu_supports_avx() {
#if PHP_HAVE_BUILTIN_CPU_INIT
__builtin_cpu_init();
#endif
return __builtin_cpu_supports("avx");
}
-static zend_always_inline int zend_cpu_support_avx2() {
+static zend_always_inline int zend_cpu_supports_avx2() {
#if PHP_HAVE_BUILTIN_CPU_INIT
__builtin_cpu_init();
#endif
@@ -149,29 +149,28 @@ static zend_always_inline int zend_cpu_support_avx2() {
}
#else
-static zend_always_inline int zend_cpu_support_sse2() {
+static zend_always_inline int zend_cpu_supports_sse2() {
return zend_cpu_supports(ZEND_CPU_FEATURE_SSE2);
}
-static zend_always_inline int zend_cpu_support_sse3() {
- return zend_cpu_supports(ZEND_CPU_FEATURE_SSE3);
+static zend_always_inline int zend_cpu_supports_ssse3() {
+ return zend_cpu_supports(ZEND_CPU_FEATURE_SSSE3);
}
-static zend_always_inline int zend_cpu_support_sse41() {
+static zend_always_inline int zend_cpu_supports_sse41() {
return zend_cpu_supports(ZEND_CPU_FEATURE_SSE41);
}
-static zend_always_inline int zend_cpu_support_sse42() {
+static zend_always_inline int zend_cpu_supports_sse42() {
return zend_cpu_supports(ZEND_CPU_FEATURE_SSE42);
}
-static zend_always_inline int zend_cpu_support_avx() {
+static zend_always_inline int zend_cpu_supports_avx() {
return zend_cpu_supports(ZEND_CPU_FEATURE_AVX);
}
-static zend_always_inline int zend_cpu_support_avx2() {
- /* TODO */
- return 0;
+static zend_always_inline int zend_cpu_supports_avx2() {
+ return zend_cpu_supports(ZEND_CPU_FEATURE_AVX2);
}
#endif
diff --git a/Zend/zend_portability.h b/Zend/zend_portability.h
index bd3e23e..abf7dcc 100644
--- a/Zend/zend_portability.h
+++ b/Zend/zend_portability.h
@@ -520,10 +520,48 @@ static zend_always_inline double _zend_get_nan(void) /* {{{ */
# define ZEND_INTRIN_HAVE_IFUNC_TARGET 1
#endif
+#if (defined(__i386__) || defined(__x86_64__))
+# if PHP_HAVE_SSSE3_INSTRUCTIONS && defined(HAVE_TMMINTRIN_H)
+# define PHP_HAVE_SSSE3
+# endif
+
+# if PHP_HAVE_SSE4_2_INSTRUCTIONS && defined(HAVE_NMMINTRIN_H)
+# define PHP_HAVE_SSE4_2
+# endif
+
+# if PHP_HAVE_AVX2_INSTRUCTIONS && defined(HAVE_IMMINTRIN_H)
+# define PHP_HAVE_AVX2
+# endif
+#endif
+
+#ifdef __SSSE3__
+/* Instructions compiled directly. */
+# define ZEND_INTRIN_SSSE3_NATIVE 1
+#elif (defined(HAVE_FUNC_ATTRIBUTE_TARGET) && defined(PHP_HAVE_SSSE3)) || defined(ZEND_WIN32)
+/* Function resolved by ifunc or MINIT. */
+# define ZEND_INTRIN_SSSE3_RESOLVER 1
+#endif
+
+#if ZEND_INTRIN_HAVE_IFUNC_TARGET && (ZEND_INTRIN_SSSE3_NATIVE || ZEND_INTRIN_SSSE3_RESOLVER)
+# define ZEND_INTRIN_SSSE3_FUNC_PROTO 1
+#elif ZEND_INTRIN_SSSE3_RESOLVER
+# define ZEND_INTRIN_SSSE3_FUNC_PTR 1
+#endif
+
+#if ZEND_INTRIN_SSSE3_RESOLVER
+# if defined(HAVE_FUNC_ATTRIBUTE_TARGET)
+# define ZEND_INTRIN_SSSE3_FUNC_DECL(func) ZEND_API func __attribute__((target("ssse3")))
+# else
+# define ZEND_INTRIN_SSSE3_FUNC_DECL(func) func
+# endif
+#else
+# define ZEND_INTRIN_SSSE3_FUNC_DECL(func)
+#endif
+
#ifdef __SSE4_2__
/* Instructions compiled directly. */
# define ZEND_INTRIN_SSE4_2_NATIVE 1
-#elif (defined(__i386__) || defined(__x86_64__)) && defined(HAVE_NMMINTRIN_H) || defined(ZEND_WIN32)
+#elif (defined(HAVE_FUNC_ATTRIBUTE_TARGET) && defined(PHP_HAVE_SSE4_2)) || defined(ZEND_WIN32)
/* Function resolved by ifunc or MINIT. */
# define ZEND_INTRIN_SSE4_2_RESOLVER 1
#endif
@@ -544,6 +582,30 @@ static zend_always_inline double _zend_get_nan(void) /* {{{ */
# define ZEND_INTRIN_SSE4_2_FUNC_DECL(func)
#endif
+#ifdef __AVX2__
+/* Instructions compiled directly. */
+# define ZEND_INTRIN_AVX2_NATIVE 1
+#elif (defined(HAVE_FUNC_ATTRIBUTE_TARGET) && defined(PHP_HAVE_AVX2)) || defined(ZEND_WIN32)
+/* Function resolved by ifunc or MINIT. */
+# define ZEND_INTRIN_AVX2_RESOLVER 1
+#endif
+
+#if ZEND_INTRIN_HAVE_IFUNC_TARGET && (ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_AVX2_RESOLVER)
+# define ZEND_INTRIN_AVX2_FUNC_PROTO 1
+#elif ZEND_INTRIN_AVX2_RESOLVER
+# define ZEND_INTRIN_AVX2_FUNC_PTR 1
+#endif
+
+#if ZEND_INTRIN_AVX2_RESOLVER
+# if defined(HAVE_FUNC_ATTRIBUTE_TARGET)
+# define ZEND_INTRIN_AVX2_FUNC_DECL(func) ZEND_API func __attribute__((target("avx2")))
+# else
+# define ZEND_INTRIN_AVX2_FUNC_DECL(func) func
+# endif
+#else
+# define ZEND_INTRIN_AVX2_FUNC_DECL(func)
+#endif
+
/* Intrinsics macros end. */
#ifdef ZEND_WIN32
diff --git a/acinclude.m4 b/acinclude.m4
index 5c6a5c5..b8902f0 100644
--- a/acinclude.m4
+++ b/acinclude.m4
@@ -3271,7 +3271,7 @@ AC_DEFUN([PHP_CHECK_BUILTIN_CPU_SUPPORTS], [
AC_MSG_CHECKING([for __builtin_cpu_supports])
AC_TRY_LINK(, [
- return __builtin_cpu_supports("sse2")? 1 : 0;
+ return __builtin_cpu_supports("sse")? 1 : 0;
], [
have_builtin_cpu_supports=1
AC_MSG_RESULT([yes])
@@ -3282,7 +3282,28 @@ AC_DEFUN([PHP_CHECK_BUILTIN_CPU_SUPPORTS], [
AC_DEFINE_UNQUOTED([PHP_HAVE_BUILTIN_CPU_SUPPORTS],
[$have_builtin_cpu_supports], [Whether the compiler supports __builtin_cpu_supports])
+])
+dnl PHP_CHECK_CPU_SUPPORTS
+AC_DEFUN([PHP_CHECK_CPU_SUPPORTS], [
+ AC_REQUIRE([PHP_CHECK_BUILTIN_CPU_INIT])
+ AC_REQUIRE([PHP_CHECK_BUILTIN_CPU_SUPPORTS])
+ have_ext_instructions=0
+ if test $have_builtin_cpu_supports = 1; then
+ AC_MSG_CHECKING([for $1 instructions supports])
+ AC_TRY_RUN([
+int main() {
+ return __builtin_cpu_supports("$1")? 0 : 1;
+}
+ ], [
+ have_ext_instructions=1
+ AC_MSG_RESULT([yes])
+ ], [
+ AC_MSG_RESULT([no])
+ ])
+ fi
+ AC_DEFINE_UNQUOTED(AS_TR_CPP([PHP_HAVE_$1_INSTRUCTIONS]),
+ [$have_ext_instructions], [Whether the compiler supports $1 instructions])
])
dnl Load the AX_CHECK_COMPILE_FLAG macro from the autoconf archive.
diff --git a/configure.ac b/configure.ac
index 811faa9..531e6b4 100644
--- a/configure.ac
+++ b/configure.ac
@@ -496,7 +496,9 @@ sys/utsname.h \
sys/ipc.h \
dlfcn.h \
assert.h \
-nmmintrin.h
+tmmintrin.h \
+nmmintrin.h \
+immintrin.h
],[],[],[
#ifdef HAVE_SYS_PARAM_H
#include <sys/param.h>
@@ -571,6 +573,12 @@ PHP_CHECK_BUILTIN_CPU_INIT
dnl Check __builtin_cpu_supports
PHP_CHECK_BUILTIN_CPU_SUPPORTS
+dnl Check instructions
+PHP_CHECK_CPU_SUPPORTS([ssse3])
+PHP_CHECK_CPU_SUPPORTS([sse4.2])
+PHP_CHECK_CPU_SUPPORTS([avx])
+PHP_CHECK_CPU_SUPPORTS([avx2])
+
dnl Check for members of the stat structure
AC_STRUCT_ST_BLKSIZE
dnl AC_STRUCT_ST_BLOCKS will screw QNX because fileblocks.o does not exists
@@ -590,7 +598,6 @@ AC_TYPE_UID_T
dnl Checks for sockaddr_storage and sockaddr.sa_len
PHP_SOCKADDR_CHECKS
-AC_MSG_CHECKING([checking building environment])
AX_GCC_FUNC_ATTRIBUTE([ifunc])
AX_GCC_FUNC_ATTRIBUTE([target])
diff --git a/ext/standard/base64.c b/ext/standard/base64.c
index 06856b8..bbe21d8 100644
--- a/ext/standard/base64.c
+++ b/ext/standard/base64.c
@@ -53,47 +53,439 @@ static const short base64_reverse_table[256] = {
};
/* }}} */
-PHPAPI zend_string *php_base64_encode(const unsigned char *str, size_t length) /* {{{ */
+static zend_always_inline unsigned char *php_base64_encode_impl(const unsigned char *in, size_t inl, unsigned char *out) /* {{{ */
{
- const unsigned char *current = str;
- unsigned char *p;
- zend_string *result;
-
- result = zend_string_safe_alloc(((length + 2) / 3), 4 * sizeof(char), 0, 0);
- p = (unsigned char *)ZSTR_VAL(result);
- while (length > 2) { /* keep going until we have less than 24 bits */
- *p++ = base64_table[current[0] >> 2];
- *p++ = base64_table[((current[0] & 0x03) << 4) + (current[1] >> 4)];
- *p++ = base64_table[((current[1] & 0x0f) << 2) + (current[2] >> 6)];
- *p++ = base64_table[current[2] & 0x3f];
+ while (inl > 2) { /* keep going until we have less than 24 bits */
+ *out++ = base64_table[in[0] >> 2];
+ *out++ = base64_table[((in[0] & 0x03) << 4) + (in[1] >> 4)];
+ *out++ = base64_table[((in[1] & 0x0f) << 2) + (in[2] >> 6)];
+ *out++ = base64_table[in[2] & 0x3f];
- current += 3;
- length -= 3; /* we just handle 3 octets of data */
+ in += 3;
+ inl -= 3; /* we just handle 3 octets of data */
}
/* now deal with the tail end of things */
- if (length != 0) {
- *p++ = base64_table[current[0] >> 2];
- if (length > 1) {
- *p++ = base64_table[((current[0] & 0x03) << 4) + (current[1] >> 4)];
- *p++ = base64_table[(current[1] & 0x0f) << 2];
- *p++ = base64_pad;
+ if (inl != 0) {
+ *out++ = base64_table[in[0] >> 2];
+ if (inl > 1) {
+ *out++ = base64_table[((in[0] & 0x03) << 4) + (in[1] >> 4)];
+ *out++ = base64_table[(in[1] & 0x0f) << 2];
+ *out++ = base64_pad;
} else {
- *p++ = base64_table[(current[0] & 0x03) << 4];
- *p++ = base64_pad;
- *p++ = base64_pad;
+ *out++ = base64_table[(in[0] & 0x03) << 4];
+ *out++ = base64_pad;
+ *out++ = base64_pad;
}
}
- *p = '\0';
- ZSTR_LEN(result) = (p - (unsigned char *)ZSTR_VAL(result));
+ *out = '\0';
+
+ return out;
+}
+/* }}} */
+
+static zend_always_inline int php_base64_decode_impl(const unsigned char *in, size_t inl, unsigned char *out, size_t *outl, zend_bool strict) /* {{{ */
+{
+ int ch, i = 0, padding = 0, j = *outl;
+
+ /* run through the whole string, converting as we go */
+ while (inl-- > 0) {
+ ch = *in++;
+ if (ch == base64_pad) {
+ padding++;
+ continue;
+ }
+
+ ch = base64_reverse_table[ch];
+ if (!strict) {
+ /* skip unknown characters and whitespace */
+ if (ch < 0) {
+ continue;
+ }
+ } else {
+ /* skip whitespace */
+ if (ch == -1) {
+ continue;
+ }
+ /* fail on bad characters or if any data follows padding */
+ if (ch == -2 || padding) {
+ goto fail;
+ }
+ }
+
+ switch (i % 4) {
+ case 0:
+ out[j] = ch << 2;
+ break;
+ case 1:
+ out[j++] |= ch >> 4;
+ out[j] = (ch & 0x0f) << 4;
+ break;
+ case 2:
+ out[j++] |= ch >>2;
+ out[j] = (ch & 0x03) << 6;
+ break;
+ case 3:
+ out[j++] |= ch;
+ break;
+ }
+ i++;
+ }
+
+ /* fail if the input is truncated (only one char in last group) */
+ if (strict && i % 4 == 1) {
+ goto fail;
+ }
+
+ /* fail if the padding length is wrong (not VV==, VVV=), but accept zero padding
+ * RFC 4648: "In some circumstances, the use of padding [--] is not required" */
+ if (strict && padding && (padding > 2 || (i + padding) % 4 != 0)) {
+ goto fail;
+ }
+
+ *outl = j;
+ out[j] = '\0';
+
+ return 1;
+
+fail:
+ return 0;
+}
+/* }}} */
+
+/* {{{ php_base64_encode */
+
+#if ZEND_INTRIN_AVX2_NATIVE
+# undef ZEND_INTRIN_SSSE3_NATIVE
+# undef ZEND_INTRIN_SSSE3_RESOLVER
+# undef ZEND_INTRIN_SSSE3_FUNC_PROTO
+# undef ZEND_INTRIN_SSSE3_FUNC_PTR
+#elif ZEND_INTRIN_AVX2_FUNC_PROTO && ZEND_INTRIN_SSSE3_NATIVE
+# undef ZEND_INTRIN_SSSE3_NATIVE
+# define ZEND_INTRIN_SSSE3_RESOLVER 1
+# define ZEND_INTRIN_SSSE3_FUNC_PROTO 1
+# undef ZEND_INTRIN_SSSE3_FUNC_DECL
+# ifdef HAVE_FUNC_ATTRIBUTE_TARGET
+# define ZEND_INTRIN_SSSE3_FUNC_DECL(func) ZEND_API func __attribute__((target("ssse3")))
+# else
+# define ZEND_INTRIN_SSSE3_FUNC_DECL(func) ZEND_API func
+# endif
+#elif ZEND_INTRIN_AVX2_FUNC_PTR && ZEND_INTRIN_SSSE3_NATIVE
+# undef ZEND_INTRIN_SSSE3_NATIVE
+# undef ZEND_INTRIN_SSSE3_RESOLVER
+# define ZEND_INTRIN_SSSE3_RESOLVER 1
+# define ZEND_INTRIN_SSSE3_FUNC_PTR 1
+# undef ZEND_INTRIN_SSSE3_FUNC_DECL
+# ifdef HAVE_FUNC_ATTRIBUTE_TARGET
+# define ZEND_INTRIN_SSSE3_FUNC_DECL(func) ZEND_API func __attribute__((target("ssse3")))
+# else
+# define ZEND_INTRIN_SSSE3_FUNC_DECL(func) ZEND_API func
+# endif
+#endif
+
+#if ZEND_INTRIN_AVX2_NATIVE
+# include <immintrin.h>
+#elif ZEND_INTRIN_SSSE3_NATIVE
+# include <tmmintrin.h>
+#elif (ZEND_INTRIN_SSSE3_RESOLVER || ZEND_INTRIN_AVX2_RESOLVER)
+# if ZEND_INTRIN_AVX2_RESOLVER
+# include <immintrin.h>
+# else
+# include <tmmintrin.h>
+# endif /* (ZEND_INTRIN_SSSE3_RESOLVER || ZEND_INTRIN_AVX2_RESOLVER) */
+# include "Zend/zend_cpuinfo.h"
+
+# if ZEND_INTRIN_AVX2_RESOLVER
+ZEND_INTRIN_AVX2_FUNC_DECL(zend_string *php_base64_encode_avx2(const unsigned char *str, size_t length));
+ZEND_INTRIN_AVX2_FUNC_DECL(zend_string *php_base64_decode_ex_avx2(const unsigned char *str, size_t length, zend_bool strict));
+# endif
+
+# if ZEND_INTRIN_SSSE3_RESOLVER
+ZEND_INTRIN_SSSE3_FUNC_DECL(zend_string *php_base64_encode_ssse3(const unsigned char *str, size_t length));
+ZEND_INTRIN_SSSE3_FUNC_DECL(zend_string *php_base64_decode_ex_ssse3(const unsigned char *str, size_t length, zend_bool strict));
+# endif
+
+zend_string *php_base64_encode_default(const unsigned char *str, size_t length);
+zend_string *php_base64_decode_ex_default(const unsigned char *str, size_t length, zend_bool strict);
+
+# if (ZEND_INTRIN_AVX2_FUNC_PROTO || ZEND_INTRIN_SSSE3_FUNC_PROTO)
+PHPAPI zend_string *php_base64_encode(const unsigned char *str, size_t length) __attribute__((ifunc("resolve_base64_encode")));
+PHPAPI zend_string *php_base64_decode_ex(const unsigned char *str, size_t length, zend_bool strict) __attribute__((ifunc("resolve_base64_decode")));
+
+static void *resolve_base64_encode() {
+# if ZEND_INTRIN_AVX2_FUNC_PROTO
+ if (zend_cpu_supports_avx2()) {
+ return php_base64_encode_avx2;
+ } else
+# endif
+ if (zend_cpu_supports_ssse3()) {
+ return php_base64_encode_ssse3;
+ }
+ return php_base64_encode_default;
+}
+
+static void *resolve_base64_decode() {
+# if ZEND_INTRIN_AVX2_FUNC_PROTO
+ if (zend_cpu_supports_avx2()) {
+ return php_base64_decode_ex_avx2;
+ } else
+# endif
+ if (zend_cpu_supports_ssse3()) {
+ return php_base64_decode_ex_ssse3;
+ }
+ return php_base64_decode_ex_default;
+}
+# else /* (ZEND_INTRIN_AVX2_FUNC_PROTO || ZEND_INTRIN_SSSE3_FUNC_PROTO) */
+
+PHPAPI zend_string *(*php_base64_encode)(const unsigned char *str, size_t length) = NULL;
+PHPAPI zend_string *(*php_base64_decode_ex)(const unsigned char *str, size_t length, zend_bool strict) = NULL;
+
+PHP_MINIT_FUNCTION(base64_intrin)
+{
+# if ZEND_INTRIN_AVX2_FUNC_PTR
+ if (zend_cpu_supports_avx2()) {
+ php_base64_encode = php_base64_encode_avx2;
+ php_base64_decode_ex = php_base64_decode_ex_avx2;
+ } else
+# endif
+ if (zend_cpu_supports_ssse3()) {
+ php_base64_encode = php_base64_encode_ssse3;
+ php_base64_decode_ex = php_base64_decode_ex_ssse3;
+ } else {
+ php_base64_encode = php_base64_encode_default;
+ php_base64_decode_ex = php_base64_decode_ex_default;
+ }
+ return SUCCESS;
+}
+# endif /* (ZEND_INTRIN_AVX2_FUNC_PROTO || ZEND_INTRIN_SSSE3_FUNC_PROTO) */
+#endif /* ZEND_INTRIN_AVX2_NATIVE */
+
+#if ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_AVX2_RESOLVER
+# if ZEND_INTRIN_AVX2_RESOLVER && defined(HAVE_FUNC_ATTRIBUTE_TARGET)
+static __m256i php_base64_encode_avx2_reshuffle(__m256i in) __attribute__((target("avx2")));
+static __m256i php_base64_encode_avx2_translate(__m256i in) __attribute__((target("avx2")));
+# endif
+static __m256i php_base64_encode_avx2_reshuffle(__m256i in)
+{
+ /* This one works with shifted (4 bytes) input in order to
+ * be able to work efficiently in the 2 128-bit lanes */
+ __m256i t0, t1, t2, t3;
+
+ /* input, bytes MSB to LSB:
+ * 0 0 0 0 x w v u t s r q p o n m
+ * l k j i h g f e d c b a 0 0 0 0 */
+ in = _mm256_shuffle_epi8(in, _mm256_set_epi8(
+ 10, 11, 9, 10,
+ 7, 8, 6, 7,
+ 4, 5, 3, 4,
+ 1, 2, 0, 1,
+
+ 14, 15, 13, 14,
+ 11, 12, 10, 11,
+ 8, 9, 7, 8,
+ 5, 6, 4, 5));
+
+ t0 = _mm256_and_si256(in, _mm256_set1_epi32(0x0fc0fc00));
+
+ t1 = _mm256_mulhi_epu16(t0, _mm256_set1_epi32(0x04000040));
+
+ t2 = _mm256_and_si256(in, _mm256_set1_epi32(0x003f03f0));
+
+ t3 = _mm256_mullo_epi16(t2, _mm256_set1_epi32(0x01000010));
+
+ return _mm256_or_si256(t1, t3);
+ /* 00xxxxxx 00wwwwXX 00vvWWWW 00VVVVVV
+ * 00uuuuuu 00ttttUU 00ssTTTT 00SSSSSS
+ * 00rrrrrr 00qqqqRR 00ppQQQQ 00PPPPPP
+ * 00oooooo 00nnnnOO 00mmNNNN 00MMMMMM
+ * 00llllll 00kkkkLL 00jjKKKK 00JJJJJJ
+ * 00iiiiii 00hhhhII 00ggHHHH 00GGGGGG
+ * 00ffffff 00eeeeFF 00ddEEEE 00DDDDDD
+ * 00cccccc 00bbbbCC 00aaBBBB 00AAAAAA */
+}
+
+static __m256i php_base64_encode_avx2_translate(__m256i in)
+{
+ __m256i lut, indices, mask;
+
+ lut = _mm256_setr_epi8(
+ 65, 71, -4, -4, -4, -4, -4, -4,
+ -4, -4, -4, -4, -19, -16, 0, 0,
+ 65, 71, -4, -4, -4, -4, -4, -4,
+ -4, -4, -4, -4, -19, -16, 0, 0);
+
+ indices = _mm256_subs_epu8(in, _mm256_set1_epi8(51));
+
+ mask = _mm256_cmpgt_epi8(in, _mm256_set1_epi8(25));
+
+ indices = _mm256_sub_epi8(indices, mask);
+
+ return _mm256_add_epi8(in, _mm256_shuffle_epi8(lut, indices));
+
+}
+#endif /* ZEND_INTRIN_AVX2_NATIVE || (ZEND_INTRIN_AVX2_RESOLVER && !ZEND_INTRIN_SSSE3_NATIVE) */
+
+#if ZEND_INTRIN_SSSE3_NATIVE || ZEND_INTRIN_SSSE3_RESOLVER
+# if ZEND_INTRIN_SSSE3_RESOLVER && defined(HAVE_FUNC_ATTRIBUTE_TARGET)
+static __m128i php_base64_encode_ssse3_reshuffle(__m128i in) __attribute__((target("ssse3")));
+static __m128i php_base64_encode_ssse3_translate(__m128i in) __attribute__((target("ssse3")));
+# endif
+
+static __m128i php_base64_encode_ssse3_reshuffle(__m128i in)
+{
+ __m128i t0, t1, t2, t3;
+
+ /* input, bytes MSB to LSB:
+ * 0 0 0 0 l k j i h g f e d c b a */
+ in = _mm_shuffle_epi8(in, _mm_set_epi8(
+ 10, 11, 9, 10,
+ 7, 8, 6, 7,
+ 4, 5, 3, 4,
+ 1, 2, 0, 1));
+
+ t0 = _mm_and_si128(in, _mm_set1_epi32(0x0fc0fc00));
+
+ t1 = _mm_mulhi_epu16(t0, _mm_set1_epi32(0x04000040));
+
+ t2 = _mm_and_si128(in, _mm_set1_epi32(0x003f03f0));
+
+ t3 = _mm_mullo_epi16(t2, _mm_set1_epi32(0x01000010));
+
+ /* output (upper case are MSB, lower case are LSB):
+ * 00llllll 00kkkkLL 00jjKKKK 00JJJJJJ
+ * 00iiiiii 00hhhhII 00ggHHHH 00GGGGGG
+ * 00ffffff 00eeeeFF 00ddEEEE 00DDDDDD
+ * 00cccccc 00bbbbCC 00aaBBBB 00AAAAAA */
+ return _mm_or_si128(t1, t3);
+}
+
+static __m128i php_base64_encode_ssse3_translate(__m128i in)
+{
+ __m128i mask, indices;
+ __m128i lut = _mm_setr_epi8(
+ 65, 71, -4, -4,
+ -4, -4, -4, -4,
+ -4, -4, -4, -4,
+ -19, -16, 0, 0
+ );
+
+ /* Translate values 0..63 to the Base64 alphabet. There are five sets:
+ * # From To Abs Index Characters
+ * 0 [0..25] [65..90] +65 0 ABCDEFGHIJKLMNOPQRSTUVWXYZ
+ * 1 [26..51] [97..122] +71 1 abcdefghijklmnopqrstuvwxyz
+ * 2 [52..61] [48..57] -4 [2..11] 0123456789
+ * 3 [62] [43] -19 12 +
+ * 4 [63] [47] -16 13 / */
+
+ /* Create LUT indices from input:
+ * the index for range #0 is right, others are 1 less than expected: */
+ indices = _mm_subs_epu8(in, _mm_set1_epi8(51));
+
+ /* mask is 0xFF (-1) for range #[1..4] and 0x00 for range #0: */
+ mask = _mm_cmpgt_epi8(in, _mm_set1_epi8(25));
+
+ /* substract -1, so add 1 to indices for range #[1..4], All indices are now correct: */
+ indices = _mm_sub_epi8(indices, mask);
+
+ /* Add offsets to input values: */
+ return _mm_add_epi8(in, _mm_shuffle_epi8(lut, indices));
+}
+#endif /* ZEND_INTRIN_SSSE3_NATIVE || (ZEND_INTRIN_SSSE3_RESOLVER && !ZEND_INTRIN_AVX2_NATIVE) */
+
+#if ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_AVX2_RESOLVER || ZEND_INTRIN_SSSE3_NATIVE || ZEND_INTRIN_SSSE3_RESOLVER
+# if ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_SSSE3_NATIVE
+PHPAPI zend_string *php_base64_encode(const unsigned char *str, size_t length)
+# elif ZEND_INTRIN_AVX2_RESOLVER
+zend_string *php_base64_encode_avx2(const unsigned char *str, size_t length)
+# elif ZEND_INTRIN_SSSE3_RESOLVER
+zend_string *php_base64_encode_ssse3(const unsigned char *str, size_t length)
+# endif
+{
+ const unsigned char *c = str;
+ unsigned char *o;
+ zend_string *result;
+
+ result = zend_string_safe_alloc(((length + 2) / 3), 4 * sizeof(char), 0, 0);
+ o = (unsigned char *)ZSTR_VAL(result);
+# if ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_AVX2_RESOLVER
+ if (length > 31) {
+ __m256i s = _mm256_loadu_si256((__m256i *)c);
+
+ s = _mm256_permutevar8x32_epi32(s, _mm256_setr_epi32(0, 0, 1, 2, 3, 4, 5, 6));
+
+ for (;;) {
+ s = php_base64_encode_avx2_reshuffle(s);
+
+ s = php_base64_encode_avx2_translate(s);
+
+ _mm256_storeu_si256((__m256i *)o, s);
+ c += 24;
+ o += 32;
+ length -= 24;
+ if (length < 28) {
+ break;
+ }
+ s = _mm256_loadu_si256((__m256i *)(c - 4));
+ }
+ }
+# else
+ while (length > 15) {
+ __m128i s = _mm_loadu_si128((__m128i *)c);
+
+ s = php_base64_encode_ssse3_reshuffle(s);
+
+ s = php_base64_encode_ssse3_translate(s);
+
+ _mm_storeu_si128((__m128i *)o, s);
+ c += 12;
+ o += 16;
+ length -= 12;
+ }
+# endif
+
+ o = php_base64_encode_impl(c, length, o);
+
+ ZSTR_LEN(result) = (o - (unsigned char *)ZSTR_VAL(result));
return result;
}
+
+# if ZEND_INTRIN_SSSE3_RESOLVER && ZEND_INTRIN_AVX2_RESOLVER
+zend_string *php_base64_encode_ssse3(const unsigned char *str, size_t length)
+{
+ const unsigned char *c = str;
+ unsigned char *o;
+ zend_string *result;
+
+ result = zend_string_safe_alloc(((length + 2) / 3), 4 * sizeof(char), 0, 0);
+ o = (unsigned char *)ZSTR_VAL(result);
+ while (length > 15) {
+ __m128i s = _mm_loadu_si128((__m128i *)c);
+
+ s = php_base64_encode_ssse3_reshuffle(s);
+
+ s = php_base64_encode_ssse3_translate(s);
+
+ _mm_storeu_si128((__m128i *)o, s);
+ c += 12;
+ o += 16;
+ length -= 12;
+ }
+
+ o = php_base64_encode_impl(c, length, o);
+
+ ZSTR_LEN(result) = (o - (unsigned char *)ZSTR_VAL(result));
+
+ return result;
+}
+# endif
+#endif /* ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_AVX2_RESOLVER || ZEND_INTRIN_SSSE3_NATIVE || ZEND_INTRIN_SSSE3_RESOLVER */
+
/* }}} */
-/* {{{ */
+/* {{{ php_base64_decode_ex */
/* generate reverse table (do not set index 0 to 64)
static unsigned short base64_reverse_table[256];
#define rt base64_reverse_table
@@ -125,78 +517,300 @@ void php_base64_init(void)
efree(s);
}
*/
-/* }}} */
-PHPAPI zend_string *php_base64_decode_ex(const unsigned char *str, size_t length, zend_bool strict) /* {{{ */
+#if ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_AVX2_RESOLVER
+# if ZEND_INTRIN_AVX2_RESOLVER && defined(HAVE_FUNC_ATTRIBUTE_TARGET)
+static __m256i php_base64_decode_avx2_reshuffle(__m256i in) __attribute__((target("avx2")));
+# endif
+
+static __m256i php_base64_decode_avx2_reshuffle(__m256i in)
{
- const unsigned char *current = str;
- int ch, i = 0, j = 0, padding = 0;
+ __m256i merge_ab_and_bc, out;
+
+ merge_ab_and_bc = _mm256_maddubs_epi16(in, _mm256_set1_epi32(0x01400140));
+
+ out = _mm256_madd_epi16(merge_ab_and_bc, _mm256_set1_epi32(0x00011000));
+
+ out = _mm256_shuffle_epi8(out, _mm256_setr_epi8(
+ 2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, -1, -1, -1, -1,
+ 2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, -1, -1, -1, -1));
+
+ return _mm256_permutevar8x32_epi32(out, _mm256_setr_epi32(0, 1, 2, 4, 5, 6, -1, -1));
+}
+#endif
+
+#if ZEND_INTRIN_SSSE3_NATIVE || ZEND_INTRIN_SSSE3_RESOLVER
+# if ZEND_INTRIN_SSSE3_RESOLVER && defined(HAVE_FUNC_ATTRIBUTE_TARGET)
+static __m128i php_base64_decode_ssse3_reshuffle(__m128i in) __attribute__((target("ssse3")));
+# endif
+
+static __m128i php_base64_decode_ssse3_reshuffle(__m128i in)
+{
+ __m128i merge_ab_and_bc, out;
+
+ merge_ab_and_bc = _mm_maddubs_epi16(in, _mm_set1_epi32(0x01400140));
+ /* 0000kkkk LLllllll 0000JJJJ JJjjKKKK
+ * 0000hhhh IIiiiiii 0000GGGG GGggHHHH
+ * 0000eeee FFffffff 0000DDDD DDddEEEE
+ * 0000bbbb CCcccccc 0000AAAA AAaaBBBB */
+
+ out = _mm_madd_epi16(merge_ab_and_bc, _mm_set1_epi32(0x00011000));
+ /* 00000000 JJJJJJjj KKKKkkkk LLllllll
+ * 00000000 GGGGGGgg HHHHhhhh IIiiiiii
+ * 00000000 DDDDDDdd EEEEeeee FFffffff
+ * 00000000 AAAAAAaa BBBBbbbb CCcccccc */
+
+ return _mm_shuffle_epi8(out, _mm_setr_epi8(
+ 2, 1, 0,
+ 6, 5, 4,
+ 10, 9, 8,
+ 14, 13, 12,
+ -1, -1, -1, -1));
+ /* 00000000 00000000 00000000 00000000
+ * LLllllll KKKKkkkk JJJJJJjj IIiiiiii
+ * HHHHhhhh GGGGGGgg FFffffff EEEEeeee
+ * DDDDDDdd CCcccccc BBBBbbbb AAAAAAaa */
+}
+#endif
+
+#if ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_AVX2_RESOLVER || ZEND_INTRIN_SSSE3_NATIVE || ZEND_INTRIN_SSSE3_RESOLVER
+# if ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_SSSE3_NATIVE
+PHPAPI zend_string *php_base64_decode_ex(const unsigned char *str, size_t length, zend_bool strict)
+# elif ZEND_INTRIN_AVX2_RESOLVER
+zend_string *php_base64_decode_ex_avx2(const unsigned char *str, size_t length, zend_bool strict)
+# else
+zend_string *php_base64_decode_ex_ssse3(const unsigned char *str, size_t length, zend_bool strict)
+# endif
+{
+ const unsigned char *c = str;
+ unsigned char *o;
+ size_t outl = 0;
zend_string *result;
result = zend_string_alloc(length, 0);
+ o = (unsigned char *)ZSTR_VAL(result);
- /* run through the whole string, converting as we go */
- while (length-- > 0) {
- ch = *current++;
- if (ch == base64_pad) {
- padding++;
- continue;
- }
+ /* See: "Faster Base64 Encoding and Decoding using AVX2 Instructions"
+ * https://arxiv.org/pdf/1704.00605.pdf */
+# if ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_AVX2_RESOLVER
+ while (length > 31 + 2) {
+ __m256i lut_lo, lut_hi, lut_roll;
+ __m256i hi_nibbles, lo_nibbles, hi, lo;
+ __m256i str = _mm256_loadu_si256((__m256i *)c);
- ch = base64_reverse_table[ch];
- if (!strict) {
- /* skip unknown characters and whitespace */
- if (ch < 0) {
- continue;
- }
+ lut_lo = _mm256_setr_epi8(
+ 0x15, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11,
+ 0x11, 0x11, 0x13, 0x1A, 0x1B, 0x1B, 0x1B, 0x1A,
+ 0x15, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11,
+ 0x11, 0x11, 0x13, 0x1A, 0x1B, 0x1B, 0x1B, 0x1A);
+
+ lut_hi = _mm256_setr_epi8(
+ 0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08,
+ 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
+ 0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08,
+ 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10);
+
+ lut_roll = _mm256_setr_epi8(
+ 0, 16, 19, 4, -65, -65, -71, -71,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 16, 19, 4, -65, -65, -71, -71,
+ 0, 0, 0, 0, 0, 0, 0, 0);
+
+ hi_nibbles = _mm256_and_si256(_mm256_srli_epi32(str, 4), _mm256_set1_epi8(0x2f));
+ lo_nibbles = _mm256_and_si256(str, _mm256_set1_epi8(0x2f));
+ hi = _mm256_shuffle_epi8(lut_hi, hi_nibbles);
+ lo = _mm256_shuffle_epi8(lut_lo, lo_nibbles);
+
+ if (!_mm256_testz_si256(lo, hi)) {
+ break;
} else {
- /* skip whitespace */
- if (ch == -1) {
- continue;
- }
- /* fail on bad characters or if any data follows padding */
- if (ch == -2 || padding) {
- goto fail;
- }
+ __m256i eq_2f, roll;
+ eq_2f = _mm256_cmpeq_epi8(str, _mm256_set1_epi8(0x2f));
+ roll = _mm256_shuffle_epi8(lut_roll, _mm256_add_epi8(eq_2f, hi_nibbles));
+
+
+ str = _mm256_add_epi8(str, roll);
+
+ str = php_base64_decode_avx2_reshuffle(str);
+
+ _mm256_storeu_si256((__m256i *)o, str);
+
+ c += 32;
+ o += 24;
+ outl += 24;
+ length -= 32;
}
+ }
+# else
+ while (length > 15 + 2) {
+ __m128i lut_lo, lut_hi, lut_roll;
+ __m128i hi_nibbles, lo_nibbles, hi, lo;
- switch(i % 4) {
- case 0:
- ZSTR_VAL(result)[j] = ch << 2;
- break;
- case 1:
- ZSTR_VAL(result)[j++] |= ch >> 4;
- ZSTR_VAL(result)[j] = (ch & 0x0f) << 4;
- break;
- case 2:
- ZSTR_VAL(result)[j++] |= ch >>2;
- ZSTR_VAL(result)[j] = (ch & 0x03) << 6;
- break;
- case 3:
- ZSTR_VAL(result)[j++] |= ch;
+ __m128i s = _mm_loadu_si128((__m128i *)c);
+
+ lut_lo = _mm_setr_epi8(
+ 0x15, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11,
+ 0x11, 0x11, 0x13, 0x1A, 0x1B, 0x1B, 0x1B, 0x1A);
+
+ lut_hi = _mm_setr_epi8(
+ 0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08,
+ 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10);
+
+ lut_roll = _mm_setr_epi8(
+ 0, 16, 19, 4, -65, -65, -71, -71,
+ 0, 0, 0, 0, 0, 0, 0, 0);
+
+ hi_nibbles = _mm_and_si128(_mm_srli_epi32(s, 4), _mm_set1_epi8(0x2f));
+ lo_nibbles = _mm_and_si128(s, _mm_set1_epi8(0x2f));
+ hi = _mm_shuffle_epi8(lut_hi, hi_nibbles);
+ lo = _mm_shuffle_epi8(lut_lo, lo_nibbles);
+
+ /* Check for invalid input: if any "and" values from lo and hi are not zero,
+ fall back on bytewise code to do error checking and reporting: */
+ if (UNEXPECTED(_mm_movemask_epi8(_mm_cmpgt_epi8(_mm_and_si128(lo, hi), _mm_set1_epi8(0))) != 0)) {
break;
+ } else {
+ __m128i eq_2f, roll;
+
+ eq_2f = _mm_cmpeq_epi8(s, _mm_set1_epi8(0x2f));
+ roll = _mm_shuffle_epi8(lut_roll, _mm_add_epi8(eq_2f, hi_nibbles));
+
+ s = _mm_add_epi8(s, roll);
+
+ s = php_base64_decode_ssse3_reshuffle(s);
+
+ _mm_storeu_si128((__m128i *)o, s);
+
+ c += 16;
+ o += 12;
+ outl += 12;
+ length -= 16;
}
- i++;
}
- /* fail if the input is truncated (only one char in last group) */
- if (strict && i % 4 == 1) {
- goto fail;
+# endif
+
+ if (!php_base64_decode_impl(c, length, (unsigned char*)ZSTR_VAL(result), &outl, strict)) {
+ zend_string_free(result);
+ return NULL;
}
- /* fail if the padding length is wrong (not VV==, VVV=), but accept zero padding
- * RFC 4648: "In some circumstances, the use of padding [--] is not required" */
- if (strict && padding && (padding > 2 || (i + padding) % 4 != 0)) {
- goto fail;
+
+ ZSTR_LEN(result) = outl;
+
+ return result;
+}
+
+# if ZEND_INTRIN_SSSE3_RESOLVER && ZEND_INTRIN_AVX2_RESOLVER
+zend_string *php_base64_decode_ex_ssse3(const unsigned char *str, size_t length, zend_bool strict)
+{
+ const unsigned char *c = str;
+ unsigned char *o;
+ size_t outl = 0;
+ zend_string *result;
+
+ result = zend_string_alloc(length, 0);
+ o = (unsigned char *)ZSTR_VAL(result);
+
+ while (length > 15 + 2) {
+ __m128i lut_lo, lut_hi, lut_roll;
+ __m128i hi_nibbles, lo_nibbles, hi, lo;
+
+ __m128i s = _mm_loadu_si128((__m128i *)c);
+
+ lut_lo = _mm_setr_epi8(
+ 0x15, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11,
+ 0x11, 0x11, 0x13, 0x1A, 0x1B, 0x1B, 0x1B, 0x1A);
+
+ lut_hi = _mm_setr_epi8(
+ 0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08,
+ 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10);
+
+ lut_roll = _mm_setr_epi8(
+ 0, 16, 19, 4, -65, -65, -71, -71,
+ 0, 0, 0, 0, 0, 0, 0, 0);
+
+ hi_nibbles = _mm_and_si128(_mm_srli_epi32(s, 4), _mm_set1_epi8(0x2f));
+ lo_nibbles = _mm_and_si128(s, _mm_set1_epi8(0x2f));
+ hi = _mm_shuffle_epi8(lut_hi, hi_nibbles);
+ lo = _mm_shuffle_epi8(lut_lo, lo_nibbles);
+
+ /* Check for invalid input: if any "and" values from lo and hi are not zero,
+ fall back on bytewise code to do error checking and reporting: */
+ if (UNEXPECTED(_mm_movemask_epi8(_mm_cmpgt_epi8(_mm_and_si128(lo, hi), _mm_set1_epi8(0))) != 0)) {
+ break;
+ } else {
+ __m128i eq_2f, roll;
+
+ eq_2f = _mm_cmpeq_epi8(s, _mm_set1_epi8(0x2f));
+ roll = _mm_shuffle_epi8(lut_roll, _mm_add_epi8(eq_2f, hi_nibbles));
+
+ s = _mm_add_epi8(s, roll);
+
+ s = php_base64_decode_ssse3_reshuffle(s);
+
+ _mm_storeu_si128((__m128i *)o, s);
+
+ c += 16;
+ o += 12;
+ outl += 12;
+ length -= 16;
+ }
+ }
+
+ if (!php_base64_decode_impl(c, length, (unsigned char*)ZSTR_VAL(result), &outl, strict)) {
+ zend_string_free(result);
+ return NULL;
}
- ZSTR_LEN(result) = j;
- ZSTR_VAL(result)[ZSTR_LEN(result)] = '\0';
+ ZSTR_LEN(result) = outl;
return result;
+}
+# endif
+#endif /* ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_AVX2_RESOLVER || ZEND_INTRIN_SSSE3_NATIVE || ZEND_INTRIN_SSSE3_RESOLVER */
-fail:
- zend_string_free(result);
- return NULL;
+#if !ZEND_INTRIN_AVX2_NATIVE && !ZEND_INTRIN_SSSE3_NATIVE
+#if ZEND_INTRIN_AVX2_RESOLVER || ZEND_INTRIN_SSSE3_RESOLVER
+zend_string *php_base64_encode_default(const unsigned char *str, size_t length)
+#else
+PHPAPI zend_string *php_base64_encode(const unsigned char *str, size_t length)
+#endif
+{
+ unsigned char *p;
+ zend_string *result;
+
+ result = zend_string_safe_alloc(((length + 2) / 3), 4 * sizeof(char), 0, 0);
+ p = (unsigned char *)ZSTR_VAL(result);
+
+ p = php_base64_encode_impl(str, length, p);
+
+ ZSTR_LEN(result) = (p - (unsigned char *)ZSTR_VAL(result));
+
+ return result;
+}
+#endif
+
+#if !ZEND_INTRIN_AVX2_NATIVE && !ZEND_INTRIN_SSSE3_NATIVE
+#if ZEND_INTRIN_AVX2_RESOLVER || ZEND_INTRIN_SSSE3_RESOLVER
+zend_string *php_base64_decode_ex_default(const unsigned char *str, size_t length, zend_bool strict)
+#else
+PHPAPI zend_string *php_base64_decode_ex(const unsigned char *str, size_t length, zend_bool strict)
+#endif
+{
+ zend_string *result;
+ size_t outl = 0;
+
+ result = zend_string_alloc(length, 0);
+
+ if (!php_base64_decode_impl(str, length, (unsigned char*)ZSTR_VAL(result), &outl, strict)) {
+ zend_string_free(result);
+ return NULL;
+ }
+
+ ZSTR_LEN(result) = outl;
+
+ return result;
}
+#endif
/* }}} */
/* {{{ proto string base64_encode(string str)
diff --git a/ext/standard/base64.h b/ext/standard/base64.h
index f380d3c..c9fe26e 100644
--- a/ext/standard/base64.h
+++ b/ext/standard/base64.h
@@ -21,16 +21,56 @@
#ifndef BASE64_H
#define BASE64_H
+/*
+ * SSSE3 and AVX2 implementation are based on https://github.com/aklomp/base64
+ * which is copyrighted to:
+ *
+ * Copyright (c) 2005-2007, Nick Galbreath
+ * Copyright (c) 2013-2017, Alfred Klomp
+ * Copyright (c) 2015-2017, Wojciech Mula
+ * Copyright (c) 2016-2017, Matthieu Darbois
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+ * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
PHP_FUNCTION(base64_decode);
PHP_FUNCTION(base64_encode);
+#if (ZEND_INTRIN_AVX2_FUNC_PTR || ZEND_INTRIN_SSSE3_FUNC_PTR) && !ZEND_INTRIN_AVX2_NATIVE
+PHP_MINIT_FUNCTION(base64_intrin);
+PHPAPI extern zend_string *(*php_base64_encode)(const unsigned char *, size_t);
+PHPAPI extern zend_string *(*php_base64_decode_ex)(const unsigned char *, size_t, zend_bool);
+#else
PHPAPI extern zend_string *php_base64_encode(const unsigned char *, size_t);
+PHPAPI extern zend_string *php_base64_decode_ex(const unsigned char *, size_t, zend_bool);
+#endif
+
static inline zend_string *php_base64_encode_str(const zend_string *str) {
return php_base64_encode((const unsigned char*)(ZSTR_VAL(str)), ZSTR_LEN(str));
}
-PHPAPI extern zend_string *php_base64_decode_ex(const unsigned char *, size_t, zend_bool);
-
static inline zend_string *php_base64_decode(const unsigned char *str, size_t len) {
return php_base64_decode_ex(str, len, 0);
}
diff --git a/ext/standard/basic_functions.c b/ext/standard/basic_functions.c
index b322caa..4404ad1 100644
--- a/ext/standard/basic_functions.c
+++ b/ext/standard/basic_functions.c
@@ -3692,6 +3692,10 @@ PHP_MINIT_FUNCTION(basic) /* {{{ */
BASIC_MINIT_SUBMODULE(string_intrin)
#endif
+#if ZEND_INTRIN_AVX2_FUNC_PTR || ZEND_INTRIN_SSSE3_FUNC_PTR
+ BASIC_MINIT_SUBMODULE(base64_intrin)
+#endif
+
BASIC_MINIT_SUBMODULE(crypt)
BASIC_MINIT_SUBMODULE(lcg)
diff --git a/ext/standard/string.c b/ext/standard/string.c
index 73bbb24..0023c4b 100644
--- a/ext/standard/string.c
+++ b/ext/standard/string.c
@@ -3873,7 +3873,7 @@ zend_string *php_addslashes_default(zend_string *str, int should_free);
PHPAPI zend_string *php_addslashes(zend_string *str, int should_free) __attribute__((ifunc("resolve_addslashes")));
static void *resolve_addslashes() {
- if (zend_cpu_support_sse42()) {
+ if (zend_cpu_supports_sse42()) {
return php_addslashes_sse42;
}
return php_addslashes_default;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment