Skip to content

Instantly share code, notes, and snippets.

@laruence
Created February 14, 2018 01:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save laruence/ab7245d3160bebe1385d7bc78055c8ca to your computer and use it in GitHub Desktop.
Save laruence/ab7245d3160bebe1385d7bc78055c8ca to your computer and use it in GitHub Desktop.
diff --git a/ext/opcache/ZendAccelerator.c b/ext/opcache/ZendAccelerator.c
index 1c870e9..d9d2cdb 100644
--- a/ext/opcache/ZendAccelerator.c
+++ b/ext/opcache/ZendAccelerator.c
@@ -1358,7 +1358,7 @@ static zend_persistent_script *store_script_in_file_cache(zend_persistent_script
memory_used = zend_accel_script_persist_calc(new_persistent_script, NULL, 0, 0);
/* Allocate memory block */
-#ifdef __SSE2__
+#if defined(__AVX__) || defined(__SSE2__)
/* Align to 64-byte boundary */
ZCG(mem) = zend_arena_alloc(&CG(arena), memory_used + 64);
ZCG(mem) = (void*)(((zend_uintptr_t)ZCG(mem) + 63L) & ~63L);
@@ -1465,7 +1465,7 @@ static zend_persistent_script *cache_script_in_shared_memory(zend_persistent_scr
memory_used = zend_accel_script_persist_calc(new_persistent_script, key, key_length, 1);
/* Allocate shared memory */
-#ifdef __SSE2__
+#if defined(__AVX__) || defined(__SSE2__)
/* Align to 64-byte boundary */
ZCG(mem) = zend_shared_alloc(memory_used + 64);
if (ZCG(mem)) {
diff --git a/ext/opcache/zend_accelerator_util_funcs.c b/ext/opcache/zend_accelerator_util_funcs.c
index bd46a86..bed05e7 100644
--- a/ext/opcache/zend_accelerator_util_funcs.c
+++ b/ext/opcache/zend_accelerator_util_funcs.c
@@ -596,8 +596,74 @@ static void zend_accel_class_hash_copy(HashTable *target, HashTable *source, uni
return;
}
-#ifdef __SSE2__
-# include <mmintrin.h>
+#if defined(__AVX__)
+# include <nmmintrin.h>
+# if defined(__GNUC__) && defined(__i386__)
+static zend_always_inline void fast_memcpy(void *dest, const void *src, size_t size)
+{
+ size_t delta = (char*)dest - (char*)src;
+
+ __asm__ volatile (
+ ".align 16\n\t"
+ ".LL0%=:\n\t"
+ "prefetchnta 0x40(%1)\n\t"
+ "vmovaps (%1), %%ymm0\n\t"
+ "vmovaps 0x20(%1), %%ymm1\n\t"
+ "vmovaps %%ymm0, (%1,%2)\n\t"
+ "vmovaps %%ymm1, 0x20(%1,%2)\n\t"
+ "addl $0x40, %1\n\t"
+ "subl $0x40, %0\n\t"
+ "ja .LL0%="
+ : "+r"(size),
+ "+r"(src)
+ : "r"(delta)
+ : "cc", "memory", "%ymm0", "%ymm1");
+}
+# elif defined(__GNUC__) && defined(__x86_64__)
+static zend_always_inline void fast_memcpy(void *dest, const void *src, size_t size)
+{
+ size_t delta = (char*)dest - (char*)src;
+
+ __asm__ volatile (
+ ".align 16\n\t"
+ ".LL0%=:\n\t"
+ "prefetchnta 0x40(%1)\n\t"
+ "vmovaps (%1), %%ymm0\n\t"
+ "vmovaps 0x20(%1), %%ymm1\n\t"
+ "vmovaps %%ymm0, (%1,%2)\n\t"
+ "vmovaps %%ymm1, 0x20(%1,%2)\n\t"
+ "addq $0x40, %1\n\t"
+ "subq $0x40, %0\n\t"
+ "ja .LL0%="
+ : "+r"(size),
+ "+r"(src)
+ : "r"(delta)
+ : "cc", "memory", "%ymm0", "%ymm1");
+}
+# else
+static zend_always_inline void fast_memcpy(void *dest, const void *src, size_t size)
+{
+ __m256i *dqdest = (__m256i*)dest;
+ const __m256i *dqsrc = (const __m256i*)src;
+ const __m256i *end = (const __m256i*)((const char*)src + size);
+
+ do {
+#ifdef PHP_WIN32
+ _mm_prefetch((const char *)(dqsrc + 2), _MM_HINT_NTA);
+#else
+ _mm_prefetch(dqsrc + 2, _MM_HINT_NTA);
+#endif
+
+ __m256i ymm0 = _mm256_load_ps(dqsrc + 0);
+ __m256i ymm1 = _mm256_load_ps(dqsrc + 1);
+ dqsrc += 2;
+ _mm256_store_ps(dqdest + 0, ymm0);
+ _mm256_store_ps(dqdest + 1, ymm1);
+ dqdest += 2;
+ } while (dqsrc != end);
+}
+# endif
+#elif defined(__SSE2__)
# include <emmintrin.h>
# if defined(__GNUC__) && defined(__i386__)
static zend_always_inline void fast_memcpy(void *dest, const void *src, size_t size)
@@ -691,7 +757,7 @@ zend_op_array* zend_accel_load_script(zend_persistent_script *persistent_script,
ZCG(current_persistent_script) = persistent_script;
ZCG(arena_mem) = NULL;
if (EXPECTED(persistent_script->arena_size)) {
-#ifdef __SSE2__
+#if defined(__AVX__) || defined(__SSE2__)
/* Target address must be aligned to 64-byte boundary */
_mm_prefetch(persistent_script->arena_mem, _MM_HINT_NTA);
ZCG(arena_mem) = zend_arena_alloc(&CG(arena), persistent_script->arena_size + 64);
diff --git a/ext/opcache/zend_file_cache.c b/ext/opcache/zend_file_cache.c
index de54949..e48ccc2 100644
--- a/ext/opcache/zend_file_cache.c
+++ b/ext/opcache/zend_file_cache.c
@@ -809,7 +809,7 @@ int zend_file_cache_script_store(zend_persistent_script *script, int in_shm)
return FAILURE;
}
-#ifdef __SSE2__
+#if defined(__AVX__) || defined(__SSE2__)
/* Align to 64-byte boundary */
mem = emalloc(script->size + 64);
buf = (void*)(((zend_uintptr_t)mem + 63L) & ~63L);
@@ -1391,7 +1391,7 @@ zend_persistent_script *zend_file_cache_script_load(zend_file_handle *file_handl
}
checkpoint = zend_arena_checkpoint(CG(arena));
-#ifdef __SSE2__
+#if defined(__AVX__) || defined(__SSE2__)
/* Align to 64-byte boundary */
mem = zend_arena_alloc(&CG(arena), info.mem_size + info.str_size + 64);
mem = (void*)(((zend_uintptr_t)mem + 63L) & ~63L);
@@ -1452,7 +1452,7 @@ zend_persistent_script *zend_file_cache_script_load(zend_file_handle *file_handl
goto use_process_mem;
}
-#ifdef __SSE2__
+#if defined(__AVX__) || defined(__SSE2__)
/* Align to 64-byte boundary */
buf = zend_shared_alloc(info.mem_size + 64);
buf = (void*)(((zend_uintptr_t)buf + 63L) & ~63L);
diff --git a/ext/opcache/zend_persist.c b/ext/opcache/zend_persist.c
index 8c16538..9fc6da7 100644
--- a/ext/opcache/zend_persist.c
+++ b/ext/opcache/zend_persist.c
@@ -870,7 +870,7 @@ zend_persistent_script *zend_accel_script_persist(zend_persistent_script *script
}
zend_accel_store_interned_string(script->script.filename);
-#ifdef __SSE2__
+#if defined(__AVX__) || defined(__SSE2__)
/* Align to 64-byte boundary */
ZCG(mem) = (void*)(((zend_uintptr_t)ZCG(mem) + 63L) & ~63L);
#else
diff --git a/ext/opcache/zend_persist_calc.c b/ext/opcache/zend_persist_calc.c
index eb80232..27d9fef 100644
--- a/ext/opcache/zend_persist_calc.c
+++ b/ext/opcache/zend_persist_calc.c
@@ -415,7 +415,7 @@ uint32_t zend_accel_script_persist_calc(zend_persistent_script *new_persistent_s
}
ADD_STRING(new_persistent_script->script.filename);
-#ifdef __SSE2__
+#if defined(__AVX__) || defined(__SSE2__)
/* Align size to 64-byte boundary */
new_persistent_script->size = (new_persistent_script->size + 63) & ~63;
#endif
@@ -430,7 +430,7 @@ uint32_t zend_accel_script_persist_calc(zend_persistent_script *new_persistent_s
zend_hash_persist_calc(&new_persistent_script->script.function_table, zend_persist_op_array_calc);
zend_persist_op_array_calc_ex(&new_persistent_script->script.main_op_array);
-#ifdef __SSE2__
+#if defined(__AVX__) || defined(__SSE2__)
/* Align size to 64-byte boundary */
new_persistent_script->arena_size = (new_persistent_script->arena_size + 63) & ~63;
#endif
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment