Last active
February 14, 2018 01:04
-
-
Save laruence/a4517072f30b6ca6e93a80cd6d9b3e7b to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/ext/opcache/ZendAccelerator.c b/ext/opcache/ZendAccelerator.c | |
index 1c870e9..1c2c41f 100644 | |
--- a/ext/opcache/ZendAccelerator.c | |
+++ b/ext/opcache/ZendAccelerator.c | |
@@ -1358,10 +1358,10 @@ static zend_persistent_script *store_script_in_file_cache(zend_persistent_script | |
memory_used = zend_accel_script_persist_calc(new_persistent_script, NULL, 0, 0); | |
/* Allocate memory block */ | |
-#ifdef __SSE2__ | |
+#if defined(__AVX__) || defined(__SSE2__) | |
/* Align to 64-byte boundary */ | |
- ZCG(mem) = zend_arena_alloc(&CG(arena), memory_used + 64); | |
- ZCG(mem) = (void*)(((zend_uintptr_t)ZCG(mem) + 63L) & ~63L); | |
+ ZCG(mem) = zend_arena_alloc(&CG(arena), memory_used + FAST_COPY_PAD); | |
+ ZCG(mem) = (void*)(((zend_uintptr_t)ZCG(mem) + FAST_COPY_MASK) & ~FAST_COPY_MASK); | |
#else | |
ZCG(mem) = zend_arena_alloc(&CG(arena), memory_used); | |
#endif | |
@@ -1465,12 +1465,12 @@ static zend_persistent_script *cache_script_in_shared_memory(zend_persistent_scr | |
memory_used = zend_accel_script_persist_calc(new_persistent_script, key, key_length, 1); | |
/* Allocate shared memory */ | |
-#ifdef __SSE2__ | |
+#if defined(__AVX__) || defined(__SSE2__) | |
/* Align to 64-byte boundary */ | |
- ZCG(mem) = zend_shared_alloc(memory_used + 64); | |
+ ZCG(mem) = zend_shared_alloc(memory_used + FAST_COPY_PAD); | |
if (ZCG(mem)) { | |
memset(ZCG(mem), 0, memory_used + 64); | |
- ZCG(mem) = (void*)(((zend_uintptr_t)ZCG(mem) + 63L) & ~63L); | |
+ ZCG(mem) = (void*)(((zend_uintptr_t)ZCG(mem) + FAST_COPY_MASK) & ~FAST_COPY_MASK); | |
} | |
#else | |
ZCG(mem) = zend_shared_alloc(memory_used); | |
diff --git a/ext/opcache/zend_accelerator_util_funcs.c b/ext/opcache/zend_accelerator_util_funcs.c | |
index bd46a86..5acbc88 100644 | |
--- a/ext/opcache/zend_accelerator_util_funcs.c | |
+++ b/ext/opcache/zend_accelerator_util_funcs.c | |
@@ -596,8 +596,86 @@ static void zend_accel_class_hash_copy(HashTable *target, HashTable *source, uni | |
return; | |
} | |
-#ifdef __SSE2__ | |
-# include <mmintrin.h> | |
+#if defined(__AVX__) | |
+# include <nmmintrin.h> | |
+# if defined(__GNUC__) && defined(__i386__) | |
+static zend_always_inline void fast_memcpy(void *dest, const void *src, size_t size) | |
+{ | |
+ size_t delta = (char*)dest - (char*)src; | |
+ | |
+ __asm__ volatile ( | |
+ ".align 16\n\t" | |
+ ".LL0%=:\n\t" | |
+ "prefetchnta 0x40(%1)\n\t" | |
+ "vmovaps (%1), %%ymm0\n\t" | |
+ "vmovaps 0x20(%1), %%ymm1\n\t" | |
+ "vmovaps 0x40(%1), %%ymm2\n\t" | |
+ "vmovaps 0x60(%1), %%ymm3\n\t" | |
+ "vmovaps %%ymm0, (%1,%2)\n\t" | |
+ "vmovaps %%ymm1, 0x20(%1,%2)\n\t" | |
+ "vmovaps %%ymm2, 0x40(%1,%2)\n\t" | |
+ "vmovaps %%ymm3, 0x60(%1,%2)\n\t" | |
+ "addl $0x80, %1\n\t" | |
+ "subl $0x80, %0\n\t" | |
+ "ja .LL0%=" | |
+ : "+r"(size), | |
+ "+r"(src) | |
+ : "r"(delta) | |
+ : "cc", "memory", "%ymm0", "%ymm1", "%ymm2", "%ymm3"); | |
+} | |
+# elif defined(__GNUC__) && defined(__x86_64__) | |
+static zend_always_inline void fast_memcpy(void *dest, const void *src, size_t size) | |
+{ | |
+ size_t delta = (char*)dest - (char*)src; | |
+ | |
+ __asm__ volatile ( | |
+ ".align 16\n\t" | |
+ ".LL0%=:\n\t" | |
+ "prefetchnta 0x40(%1)\n\t" | |
+ "vmovaps (%1), %%ymm0\n\t" | |
+ "vmovaps 0x20(%1), %%ymm1\n\t" | |
+ "vmovaps 0x40(%1), %%ymm2\n\t" | |
+ "vmovaps 0x60(%1), %%ymm3\n\t" | |
+ "vmovaps %%ymm0, (%1,%2)\n\t" | |
+ "vmovaps %%ymm1, 0x20(%1,%2)\n\t" | |
+ "vmovaps %%ymm2, 0x40(%1,%2)\n\t" | |
+ "vmovaps %%ymm3, 0x60(%1,%2)\n\t" | |
+ "addq $0x80, %1\n\t" | |
+ "subq $0x80, %0\n\t" | |
+ "ja .LL0%=" | |
+ : "+r"(size), | |
+ "+r"(src) | |
+ : "r"(delta) | |
+ : "cc", "memory", "%ymm0", "%ymm1", "%ymm2", "%ymm3"); | |
+} | |
+# else | |
+static zend_always_inline void fast_memcpy(void *dest, const void *src, size_t size) | |
+{ | |
+ __m256i *dqdest = (__m256i*)dest; | |
+ const __m256i *dqsrc = (const __m256i*)src; | |
+ const __m256i *end = (const __m256i*)((const char*)src + size); | |
+ | |
+ do { | |
+#ifdef PHP_WIN32 | |
+ _mm_prefetch((const char *)(dqsrc + 4), _MM_HINT_NTA); | |
+#else | |
+ _mm_prefetch(dqsrc + 4, _MM_HINT_NTA); | |
+#endif | |
+ | |
+ __m256i ymm0 = _mm256_load_ps(dqsrc + 0); | |
+ __m256i ymm1 = _mm256_load_ps(dqsrc + 1); | |
+ __m256i ymm2 = _mm256_load_ps(dqsrc + 2); | |
+ __m256i ymm3 = _mm256_load_ps(dqsrc + 3); | |
+ dqsrc += 4; | |
+ _mm256_store_ps(dqdest + 0, ymm0); | |
+ _mm256_store_ps(dqdest + 1, ymm1); | |
+ _mm256_store_ps(dqdest + 2, ymm2); | |
+ _mm256_store_ps(dqdest + 3, ymm3); | |
+ dqdest += 4; | |
+ } while (dqsrc != end); | |
+} | |
+# endif | |
+#elif defined(__SSE2__) | |
# include <emmintrin.h> | |
# if defined(__GNUC__) && defined(__i386__) | |
static zend_always_inline void fast_memcpy(void *dest, const void *src, size_t size) | |
@@ -691,11 +769,11 @@ zend_op_array* zend_accel_load_script(zend_persistent_script *persistent_script, | |
ZCG(current_persistent_script) = persistent_script; | |
ZCG(arena_mem) = NULL; | |
if (EXPECTED(persistent_script->arena_size)) { | |
-#ifdef __SSE2__ | |
+#if defined(__AVX__) || defined(__SSE2__) | |
/* Target address must be aligned to 64-byte boundary */ | |
_mm_prefetch(persistent_script->arena_mem, _MM_HINT_NTA); | |
- ZCG(arena_mem) = zend_arena_alloc(&CG(arena), persistent_script->arena_size + 64); | |
- ZCG(arena_mem) = (void*)(((zend_uintptr_t)ZCG(arena_mem) + 63L) & ~63L); | |
+ ZCG(arena_mem) = zend_arena_alloc(&CG(arena), persistent_script->arena_size + FAST_COPY_PAD); | |
+ ZCG(arena_mem) = (void*)(((zend_uintptr_t)ZCG(arena_mem) + FAST_COPY_MASK) & ~FAST_COPY_MASK); | |
fast_memcpy(ZCG(arena_mem), persistent_script->arena_mem, persistent_script->arena_size); | |
#else | |
ZCG(arena_mem) = zend_arena_alloc(&CG(arena), persistent_script->arena_size); | |
diff --git a/ext/opcache/zend_accelerator_util_funcs.h b/ext/opcache/zend_accelerator_util_funcs.h | |
index 5d7834a..8fb7acd 100644 | |
--- a/ext/opcache/zend_accelerator_util_funcs.h | |
+++ b/ext/opcache/zend_accelerator_util_funcs.h | |
@@ -41,6 +41,14 @@ unsigned int zend_adler32(unsigned int checksum, signed char *buf, uint32_t len) | |
unsigned int zend_accel_script_checksum(zend_persistent_script *persistent_script); | |
+#if defined(__AVX__) | |
+#define FAST_COPY_PAD 128L | |
+#define FAST_COPY_MASK 127L | |
+#elif defined(__SSE2__) | |
+#define FAST_COPY_PAD 64L | |
+#define FAST_COPY_MASK 63L | |
+#endif | |
+ | |
#endif /* ZEND_ACCELERATOR_UTIL_FUNCS_H */ | |
/* | |
diff --git a/ext/opcache/zend_file_cache.c b/ext/opcache/zend_file_cache.c | |
index de54949..a2c985c 100644 | |
--- a/ext/opcache/zend_file_cache.c | |
+++ b/ext/opcache/zend_file_cache.c | |
@@ -809,10 +809,9 @@ int zend_file_cache_script_store(zend_persistent_script *script, int in_shm) | |
return FAILURE; | |
} | |
-#ifdef __SSE2__ | |
- /* Align to 64-byte boundary */ | |
- mem = emalloc(script->size + 64); | |
- buf = (void*)(((zend_uintptr_t)mem + 63L) & ~63L); | |
+#if defined(__AVX__) || defined(__SSE2__) | |
+ mem = emalloc(script->size + FAST_COPY_PAD); | |
+ buf = (void*)(((zend_uintptr_t)mem + FAST_COPY_MASK) & ~FAST_COPY_MASK); | |
#else | |
mem = buf = emalloc(script->size); | |
#endif | |
@@ -1391,10 +1390,9 @@ zend_persistent_script *zend_file_cache_script_load(zend_file_handle *file_handl | |
} | |
checkpoint = zend_arena_checkpoint(CG(arena)); | |
-#ifdef __SSE2__ | |
- /* Align to 64-byte boundary */ | |
- mem = zend_arena_alloc(&CG(arena), info.mem_size + info.str_size + 64); | |
- mem = (void*)(((zend_uintptr_t)mem + 63L) & ~63L); | |
+#if defined(__AVX__) || defined(__SSE2__) | |
+ mem = zend_arena_alloc(&CG(arena), info.mem_size + info.str_size + FAST_COPY_PAD); | |
+ mem = (void*)(((zend_uintptr_t)mem + FAST_COPY_MASK) & ~FAST_COPY_MASK); | |
#else | |
mem = zend_arena_alloc(&CG(arena), info.mem_size + info.str_size); | |
#endif | |
@@ -1452,10 +1450,9 @@ zend_persistent_script *zend_file_cache_script_load(zend_file_handle *file_handl | |
goto use_process_mem; | |
} | |
-#ifdef __SSE2__ | |
- /* Align to 64-byte boundary */ | |
- buf = zend_shared_alloc(info.mem_size + 64); | |
- buf = (void*)(((zend_uintptr_t)buf + 63L) & ~63L); | |
+#if defined(__AVX__) || defined(__SSE2__) | |
+ buf = zend_shared_alloc(info.mem_size + FAST_COPY_PAD); | |
+ buf = (void*)(((zend_uintptr_t)buf + FAST_COPY_MASK) & ~FAST_COPY_MASK); | |
#else | |
buf = zend_shared_alloc(info.mem_size); | |
#endif | |
diff --git a/ext/opcache/zend_persist.c b/ext/opcache/zend_persist.c | |
index 8c16538..7476a7c 100644 | |
--- a/ext/opcache/zend_persist.c | |
+++ b/ext/opcache/zend_persist.c | |
@@ -27,6 +27,7 @@ | |
#include "zend_vm.h" | |
#include "zend_constants.h" | |
#include "zend_operators.h" | |
+#include "zend_accelerator_util_funcs.h" | |
#define zend_accel_store(p, size) \ | |
(p = _zend_shared_memdup((void*)p, size, 1)) | |
@@ -870,9 +871,9 @@ zend_persistent_script *zend_accel_script_persist(zend_persistent_script *script | |
} | |
zend_accel_store_interned_string(script->script.filename); | |
-#ifdef __SSE2__ | |
+#if defined(__AVX__) || defined(__SSE2__) | |
/* Align to 64-byte boundary */ | |
- ZCG(mem) = (void*)(((zend_uintptr_t)ZCG(mem) + 63L) & ~63L); | |
+ ZCG(mem) = (void*)(((zend_uintptr_t)ZCG(mem) + FAST_COPY_MASK) & ~FAST_COPY_MASK); | |
#else | |
ZEND_ASSERT(((zend_uintptr_t)ZCG(mem) & 0x7) == 0); /* should be 8 byte aligned */ | |
#endif | |
diff --git a/ext/opcache/zend_persist_calc.c b/ext/opcache/zend_persist_calc.c | |
index eb80232..c925d43 100644 | |
--- a/ext/opcache/zend_persist_calc.c | |
+++ b/ext/opcache/zend_persist_calc.c | |
@@ -25,6 +25,7 @@ | |
#include "zend_extensions.h" | |
#include "zend_shared_alloc.h" | |
#include "zend_operators.h" | |
+#include "zend_accelerator_util_funcs.h" | |
#define ADD_DUP_SIZE(m,s) ZCG(current_persistent_script)->size += zend_shared_memdup_size((void*)m, s) | |
#define ADD_SIZE(m) ZCG(current_persistent_script)->size += ZEND_ALIGNED_SIZE(m) | |
@@ -415,9 +416,9 @@ uint32_t zend_accel_script_persist_calc(zend_persistent_script *new_persistent_s | |
} | |
ADD_STRING(new_persistent_script->script.filename); | |
-#ifdef __SSE2__ | |
+#if defined(__AVX__) || defined(__SSE2__) | |
/* Align size to 64-byte boundary */ | |
- new_persistent_script->size = (new_persistent_script->size + 63) & ~63; | |
+ new_persistent_script->size = (new_persistent_script->size + FAST_COPY_MASK) & ~FAST_COPY_MASK; | |
#endif | |
if (new_persistent_script->script.class_table.nNumUsed != new_persistent_script->script.class_table.nNumOfElements) { | |
@@ -430,9 +431,9 @@ uint32_t zend_accel_script_persist_calc(zend_persistent_script *new_persistent_s | |
zend_hash_persist_calc(&new_persistent_script->script.function_table, zend_persist_op_array_calc); | |
zend_persist_op_array_calc_ex(&new_persistent_script->script.main_op_array); | |
-#ifdef __SSE2__ | |
+#if defined(__AVX__) || defined(__SSE2__) | |
/* Align size to 64-byte boundary */ | |
- new_persistent_script->arena_size = (new_persistent_script->arena_size + 63) & ~63; | |
+ new_persistent_script->arena_size = (new_persistent_script->arena_size + FAST_COPY_MASK) & ~FAST_COPY_MASK; | |
#endif | |
new_persistent_script->size += new_persistent_script->arena_size; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment