Created
November 8, 2016 15:35
-
-
Save bluca/447e952e0672421714b29158257f5011 to your computer and use it in GitHub Desktop.
DPDK headers diff amd64 <> arm64
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
From 9e8a966737201210ee784e48057d803e2cba3016 Mon Sep 17 00:00:00 2001 | |
From: Luca Boccassi <lboccass@brocade.com> | |
Date: Tue, 8 Nov 2016 15:34:05 +0000 | |
Subject: [PATCH] Header diff amd64 <> arm64 | |
--- | |
include/dpdk/rte_atomic.h | 194 +---------- | |
include/dpdk/rte_atomic_32.h | 220 ++---------- | |
include/dpdk/rte_atomic_64.h | 187 +++------- | |
include/dpdk/rte_byteorder.h | 92 ++--- | |
include/dpdk/rte_config.h | 14 +- | |
include/dpdk/rte_cpuflags.h | 131 +------ | |
include/dpdk/rte_cycles.h | 99 +----- | |
include/dpdk/rte_memcpy.h | 806 +------------------------------------------ | |
include/dpdk/rte_prefetch.h | 45 +-- | |
include/dpdk/rte_rwlock.h | 54 +-- | |
include/dpdk/rte_spinlock.h | 128 +------ | |
include/dpdk/rte_vect.h | 101 ++---- | |
12 files changed, 216 insertions(+), 1855 deletions(-) | |
diff --git a/include/dpdk/rte_atomic.h b/include/dpdk/rte_atomic.h | |
index b20056b..454a12b 100644 | |
--- a/include/dpdk/rte_atomic.h | |
+++ b/include/dpdk/rte_atomic.h | |
@@ -1,8 +1,7 @@ | |
/*- | |
* BSD LICENSE | |
* | |
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. | |
- * All rights reserved. | |
+ * Copyright(c) 2015 RehiveTech. All rights reserved. | |
* | |
* Redistribution and use in source and binary forms, with or without | |
* modification, are permitted provided that the following conditions | |
@@ -14,7 +13,7 @@ | |
* notice, this list of conditions and the following disclaimer in | |
* the documentation and/or other materials provided with the | |
* distribution. | |
- * * Neither the name of Intel Corporation nor the names of its | |
+ * * Neither the name of RehiveTech nor the names of its | |
* contributors may be used to endorse or promote products derived | |
* from this software without specific prior written permission. | |
* | |
@@ -31,192 +30,19 @@ | |
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
*/ | |
-#ifndef _RTE_ATOMIC_X86_H_ | |
-#define _RTE_ATOMIC_X86_H_ | |
+#ifndef _RTE_ATOMIC_ARM_H_ | |
+#define _RTE_ATOMIC_ARM_H_ | |
-#ifdef __cplusplus | |
-extern "C" { | |
-#endif | |
- | |
-#include <emmintrin.h> | |
-#include "generic/rte_atomic.h" | |
- | |
-#if RTE_MAX_LCORE == 1 | |
-#define MPLOCKED /**< No need to insert MP lock prefix. */ | |
+#ifdef RTE_ARCH_64 | |
+#include <rte_atomic_64.h> | |
#else | |
-#define MPLOCKED "lock ; " /**< Insert MP lock prefix. */ | |
+#include <rte_atomic_32.h> | |
#endif | |
-#define rte_mb() _mm_mfence() | |
- | |
-#define rte_wmb() _mm_sfence() | |
- | |
-#define rte_rmb() _mm_lfence() | |
- | |
#define rte_smp_mb() rte_mb() | |
-#define rte_smp_wmb() rte_compiler_barrier() | |
+#define rte_smp_wmb() rte_wmb() | |
-#define rte_smp_rmb() rte_compiler_barrier() | |
+#define rte_smp_rmb() rte_rmb() | |
-/*------------------------- 16 bit atomic operations -------------------------*/ | |
- | |
-#ifndef RTE_FORCE_INTRINSICS | |
-static inline int | |
-rte_atomic16_cmpset(volatile uint16_t *dst, uint16_t exp, uint16_t src) | |
-{ | |
- uint8_t res; | |
- | |
- asm volatile( | |
- MPLOCKED | |
- "cmpxchgw %[src], %[dst];" | |
- "sete %[res];" | |
- : [res] "=a" (res), /* output */ | |
- [dst] "=m" (*dst) | |
- : [src] "r" (src), /* input */ | |
- "a" (exp), | |
- "m" (*dst) | |
- : "memory"); /* no-clobber list */ | |
- return res; | |
-} | |
- | |
-static inline int rte_atomic16_test_and_set(rte_atomic16_t *v) | |
-{ | |
- return rte_atomic16_cmpset((volatile uint16_t *)&v->cnt, 0, 1); | |
-} | |
- | |
-static inline void | |
-rte_atomic16_inc(rte_atomic16_t *v) | |
-{ | |
- asm volatile( | |
- MPLOCKED | |
- "incw %[cnt]" | |
- : [cnt] "=m" (v->cnt) /* output */ | |
- : "m" (v->cnt) /* input */ | |
- ); | |
-} | |
- | |
-static inline void | |
-rte_atomic16_dec(rte_atomic16_t *v) | |
-{ | |
- asm volatile( | |
- MPLOCKED | |
- "decw %[cnt]" | |
- : [cnt] "=m" (v->cnt) /* output */ | |
- : "m" (v->cnt) /* input */ | |
- ); | |
-} | |
- | |
-static inline int rte_atomic16_inc_and_test(rte_atomic16_t *v) | |
-{ | |
- uint8_t ret; | |
- | |
- asm volatile( | |
- MPLOCKED | |
- "incw %[cnt] ; " | |
- "sete %[ret]" | |
- : [cnt] "+m" (v->cnt), /* output */ | |
- [ret] "=qm" (ret) | |
- ); | |
- return ret != 0; | |
-} | |
- | |
-static inline int rte_atomic16_dec_and_test(rte_atomic16_t *v) | |
-{ | |
- uint8_t ret; | |
- | |
- asm volatile(MPLOCKED | |
- "decw %[cnt] ; " | |
- "sete %[ret]" | |
- : [cnt] "+m" (v->cnt), /* output */ | |
- [ret] "=qm" (ret) | |
- ); | |
- return ret != 0; | |
-} | |
- | |
-/*------------------------- 32 bit atomic operations -------------------------*/ | |
- | |
-static inline int | |
-rte_atomic32_cmpset(volatile uint32_t *dst, uint32_t exp, uint32_t src) | |
-{ | |
- uint8_t res; | |
- | |
- asm volatile( | |
- MPLOCKED | |
- "cmpxchgl %[src], %[dst];" | |
- "sete %[res];" | |
- : [res] "=a" (res), /* output */ | |
- [dst] "=m" (*dst) | |
- : [src] "r" (src), /* input */ | |
- "a" (exp), | |
- "m" (*dst) | |
- : "memory"); /* no-clobber list */ | |
- return res; | |
-} | |
- | |
-static inline int rte_atomic32_test_and_set(rte_atomic32_t *v) | |
-{ | |
- return rte_atomic32_cmpset((volatile uint32_t *)&v->cnt, 0, 1); | |
-} | |
- | |
-static inline void | |
-rte_atomic32_inc(rte_atomic32_t *v) | |
-{ | |
- asm volatile( | |
- MPLOCKED | |
- "incl %[cnt]" | |
- : [cnt] "=m" (v->cnt) /* output */ | |
- : "m" (v->cnt) /* input */ | |
- ); | |
-} | |
- | |
-static inline void | |
-rte_atomic32_dec(rte_atomic32_t *v) | |
-{ | |
- asm volatile( | |
- MPLOCKED | |
- "decl %[cnt]" | |
- : [cnt] "=m" (v->cnt) /* output */ | |
- : "m" (v->cnt) /* input */ | |
- ); | |
-} | |
- | |
-static inline int rte_atomic32_inc_and_test(rte_atomic32_t *v) | |
-{ | |
- uint8_t ret; | |
- | |
- asm volatile( | |
- MPLOCKED | |
- "incl %[cnt] ; " | |
- "sete %[ret]" | |
- : [cnt] "+m" (v->cnt), /* output */ | |
- [ret] "=qm" (ret) | |
- ); | |
- return ret != 0; | |
-} | |
- | |
-static inline int rte_atomic32_dec_and_test(rte_atomic32_t *v) | |
-{ | |
- uint8_t ret; | |
- | |
- asm volatile(MPLOCKED | |
- "decl %[cnt] ; " | |
- "sete %[ret]" | |
- : [cnt] "+m" (v->cnt), /* output */ | |
- [ret] "=qm" (ret) | |
- ); | |
- return ret != 0; | |
-} | |
-#endif | |
- | |
-#ifdef RTE_ARCH_I686 | |
-#include "rte_atomic_32.h" | |
-#else | |
-#include "rte_atomic_64.h" | |
-#endif | |
- | |
-#ifdef __cplusplus | |
-} | |
-#endif | |
- | |
-#endif /* _RTE_ATOMIC_X86_H_ */ | |
+#endif /* _RTE_ATOMIC_ARM_H_ */ | |
diff --git a/include/dpdk/rte_atomic_32.h b/include/dpdk/rte_atomic_32.h | |
index 400d8a9..9ae1e78 100644 | |
--- a/include/dpdk/rte_atomic_32.h | |
+++ b/include/dpdk/rte_atomic_32.h | |
@@ -1,8 +1,7 @@ | |
/*- | |
* BSD LICENSE | |
* | |
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. | |
- * All rights reserved. | |
+ * Copyright(c) 2015 RehiveTech. All rights reserved. | |
* | |
* Redistribution and use in source and binary forms, with or without | |
* modification, are permitted provided that the following conditions | |
@@ -14,7 +13,7 @@ | |
* notice, this list of conditions and the following disclaimer in | |
* the documentation and/or other materials provided with the | |
* distribution. | |
- * * Neither the name of Intel Corporation nor the names of its | |
+ * * Neither the name of RehiveTech nor the names of its | |
* contributors may be used to endorse or promote products derived | |
* from this software without specific prior written permission. | |
* | |
@@ -31,192 +30,45 @@ | |
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
*/ | |
-/* | |
- * Inspired from FreeBSD src/sys/i386/include/atomic.h | |
- * Copyright (c) 1998 Doug Rabson | |
- * All rights reserved. | |
- */ | |
- | |
-#ifndef _RTE_ATOMIC_I686_H_ | |
-#define _RTE_ATOMIC_I686_H_ | |
- | |
-/*------------------------- 64 bit atomic operations -------------------------*/ | |
+#ifndef _RTE_ATOMIC_ARM32_H_ | |
+#define _RTE_ATOMIC_ARM32_H_ | |
#ifndef RTE_FORCE_INTRINSICS | |
-static inline int | |
-rte_atomic64_cmpset(volatile uint64_t *dst, uint64_t exp, uint64_t src) | |
-{ | |
- uint8_t res; | |
- union { | |
- struct { | |
- uint32_t l32; | |
- uint32_t h32; | |
- }; | |
- uint64_t u64; | |
- } _exp, _src; | |
- | |
- _exp.u64 = exp; | |
- _src.u64 = src; | |
- | |
-#ifndef __PIC__ | |
- asm volatile ( | |
- MPLOCKED | |
- "cmpxchg8b (%[dst]);" | |
- "setz %[res];" | |
- : [res] "=a" (res) /* result in eax */ | |
- : [dst] "S" (dst), /* esi */ | |
- "b" (_src.l32), /* ebx */ | |
- "c" (_src.h32), /* ecx */ | |
- "a" (_exp.l32), /* eax */ | |
- "d" (_exp.h32) /* edx */ | |
- : "memory" ); /* no-clobber list */ | |
-#else | |
- asm volatile ( | |
- "mov %%ebx, %%edi\n" | |
- MPLOCKED | |
- "cmpxchg8b (%[dst]);" | |
- "setz %[res];" | |
- "xchgl %%ebx, %%edi;\n" | |
- : [res] "=a" (res) /* result in eax */ | |
- : [dst] "S" (dst), /* esi */ | |
- "D" (_src.l32), /* ebx */ | |
- "c" (_src.h32), /* ecx */ | |
- "a" (_exp.l32), /* eax */ | |
- "d" (_exp.h32) /* edx */ | |
- : "memory" ); /* no-clobber list */ | |
+# error Platform must be built with CONFIG_RTE_FORCE_INTRINSICS | |
#endif | |
- return res; | |
-} | |
- | |
-static inline void | |
-rte_atomic64_init(rte_atomic64_t *v) | |
-{ | |
- int success = 0; | |
- uint64_t tmp; | |
- | |
- while (success == 0) { | |
- tmp = v->cnt; | |
- success = rte_atomic64_cmpset((volatile uint64_t *)&v->cnt, | |
- tmp, 0); | |
- } | |
-} | |
- | |
-static inline int64_t | |
-rte_atomic64_read(rte_atomic64_t *v) | |
-{ | |
- int success = 0; | |
- uint64_t tmp; | |
- | |
- while (success == 0) { | |
- tmp = v->cnt; | |
- /* replace the value by itself */ | |
- success = rte_atomic64_cmpset((volatile uint64_t *)&v->cnt, | |
- tmp, tmp); | |
- } | |
- return tmp; | |
-} | |
- | |
-static inline void | |
-rte_atomic64_set(rte_atomic64_t *v, int64_t new_value) | |
-{ | |
- int success = 0; | |
- uint64_t tmp; | |
- | |
- while (success == 0) { | |
- tmp = v->cnt; | |
- success = rte_atomic64_cmpset((volatile uint64_t *)&v->cnt, | |
- tmp, new_value); | |
- } | |
-} | |
- | |
-static inline void | |
-rte_atomic64_add(rte_atomic64_t *v, int64_t inc) | |
-{ | |
- int success = 0; | |
- uint64_t tmp; | |
- | |
- while (success == 0) { | |
- tmp = v->cnt; | |
- success = rte_atomic64_cmpset((volatile uint64_t *)&v->cnt, | |
- tmp, tmp + inc); | |
- } | |
-} | |
- | |
-static inline void | |
-rte_atomic64_sub(rte_atomic64_t *v, int64_t dec) | |
-{ | |
- int success = 0; | |
- uint64_t tmp; | |
- | |
- while (success == 0) { | |
- tmp = v->cnt; | |
- success = rte_atomic64_cmpset((volatile uint64_t *)&v->cnt, | |
- tmp, tmp - dec); | |
- } | |
-} | |
- | |
-static inline void | |
-rte_atomic64_inc(rte_atomic64_t *v) | |
-{ | |
- rte_atomic64_add(v, 1); | |
-} | |
- | |
-static inline void | |
-rte_atomic64_dec(rte_atomic64_t *v) | |
-{ | |
- rte_atomic64_sub(v, 1); | |
-} | |
- | |
-static inline int64_t | |
-rte_atomic64_add_return(rte_atomic64_t *v, int64_t inc) | |
-{ | |
- int success = 0; | |
- uint64_t tmp; | |
- | |
- while (success == 0) { | |
- tmp = v->cnt; | |
- success = rte_atomic64_cmpset((volatile uint64_t *)&v->cnt, | |
- tmp, tmp + inc); | |
- } | |
- | |
- return tmp + inc; | |
-} | |
- | |
-static inline int64_t | |
-rte_atomic64_sub_return(rte_atomic64_t *v, int64_t dec) | |
-{ | |
- int success = 0; | |
- uint64_t tmp; | |
- | |
- while (success == 0) { | |
- tmp = v->cnt; | |
- success = rte_atomic64_cmpset((volatile uint64_t *)&v->cnt, | |
- tmp, tmp - dec); | |
- } | |
- | |
- return tmp - dec; | |
-} | |
- | |
-static inline int rte_atomic64_inc_and_test(rte_atomic64_t *v) | |
-{ | |
- return rte_atomic64_add_return(v, 1) == 0; | |
-} | |
- | |
-static inline int rte_atomic64_dec_and_test(rte_atomic64_t *v) | |
-{ | |
- return rte_atomic64_sub_return(v, 1) == 0; | |
-} | |
- | |
-static inline int rte_atomic64_test_and_set(rte_atomic64_t *v) | |
-{ | |
- return rte_atomic64_cmpset((volatile uint64_t *)&v->cnt, 0, 1); | |
-} | |
+#ifdef __cplusplus | |
+extern "C" { | |
+#endif | |
-static inline void rte_atomic64_clear(rte_atomic64_t *v) | |
-{ | |
- rte_atomic64_set(v, 0); | |
+#include "generic/rte_atomic.h" | |
+ | |
+/** | |
+ * General memory barrier. | |
+ * | |
+ * Guarantees that the LOAD and STORE operations generated before the | |
+ * barrier occur before the LOAD and STORE operations generated after. | |
+ */ | |
+#define rte_mb() __sync_synchronize() | |
+ | |
+/** | |
+ * Write memory barrier. | |
+ * | |
+ * Guarantees that the STORE operations generated before the barrier | |
+ * occur before the STORE operations generated after. | |
+ */ | |
+#define rte_wmb() do { asm volatile ("dmb st" : : : "memory"); } while (0) | |
+ | |
+/** | |
+ * Read memory barrier. | |
+ * | |
+ * Guarantees that the LOAD operations generated before the barrier | |
+ * occur before the LOAD operations generated after. | |
+ */ | |
+#define rte_rmb() __sync_synchronize() | |
+ | |
+#ifdef __cplusplus | |
} | |
#endif | |
-#endif /* _RTE_ATOMIC_I686_H_ */ | |
+#endif /* _RTE_ATOMIC_ARM32_H_ */ | |
diff --git a/include/dpdk/rte_atomic_64.h b/include/dpdk/rte_atomic_64.h | |
index 4de6600..671caa7 100644 | |
--- a/include/dpdk/rte_atomic_64.h | |
+++ b/include/dpdk/rte_atomic_64.h | |
@@ -1,8 +1,7 @@ | |
-/*- | |
+/* | |
* BSD LICENSE | |
* | |
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. | |
- * All rights reserved. | |
+ * Copyright (C) Cavium networks Ltd. 2015. | |
* | |
* Redistribution and use in source and binary forms, with or without | |
* modification, are permitted provided that the following conditions | |
@@ -14,7 +13,7 @@ | |
* notice, this list of conditions and the following disclaimer in | |
* the documentation and/or other materials provided with the | |
* distribution. | |
- * * Neither the name of Intel Corporation nor the names of its | |
+ * * Neither the name of Cavium networks nor the names of its | |
* contributors may be used to endorse or promote products derived | |
* from this software without specific prior written permission. | |
* | |
@@ -29,163 +28,61 @@ | |
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
- */ | |
+*/ | |
-/* | |
- * Inspired from FreeBSD src/sys/amd64/include/atomic.h | |
- * Copyright (c) 1998 Doug Rabson | |
- * All rights reserved. | |
- */ | |
- | |
-#ifndef _RTE_ATOMIC_X86_64_H_ | |
-#define _RTE_ATOMIC_X86_64_H_ | |
- | |
-/*------------------------- 64 bit atomic operations -------------------------*/ | |
+#ifndef _RTE_ATOMIC_ARM64_H_ | |
+#define _RTE_ATOMIC_ARM64_H_ | |
#ifndef RTE_FORCE_INTRINSICS | |
-static inline int | |
-rte_atomic64_cmpset(volatile uint64_t *dst, uint64_t exp, uint64_t src) | |
-{ | |
- uint8_t res; | |
- | |
- | |
- asm volatile( | |
- MPLOCKED | |
- "cmpxchgq %[src], %[dst];" | |
- "sete %[res];" | |
- : [res] "=a" (res), /* output */ | |
- [dst] "=m" (*dst) | |
- : [src] "r" (src), /* input */ | |
- "a" (exp), | |
- "m" (*dst) | |
- : "memory"); /* no-clobber list */ | |
- | |
- return res; | |
-} | |
- | |
-static inline void | |
-rte_atomic64_init(rte_atomic64_t *v) | |
-{ | |
- v->cnt = 0; | |
-} | |
- | |
-static inline int64_t | |
-rte_atomic64_read(rte_atomic64_t *v) | |
-{ | |
- return v->cnt; | |
-} | |
- | |
-static inline void | |
-rte_atomic64_set(rte_atomic64_t *v, int64_t new_value) | |
-{ | |
- v->cnt = new_value; | |
-} | |
- | |
-static inline void | |
-rte_atomic64_add(rte_atomic64_t *v, int64_t inc) | |
-{ | |
- asm volatile( | |
- MPLOCKED | |
- "addq %[inc], %[cnt]" | |
- : [cnt] "=m" (v->cnt) /* output */ | |
- : [inc] "ir" (inc), /* input */ | |
- "m" (v->cnt) | |
- ); | |
-} | |
- | |
-static inline void | |
-rte_atomic64_sub(rte_atomic64_t *v, int64_t dec) | |
-{ | |
- asm volatile( | |
- MPLOCKED | |
- "subq %[dec], %[cnt]" | |
- : [cnt] "=m" (v->cnt) /* output */ | |
- : [dec] "ir" (dec), /* input */ | |
- "m" (v->cnt) | |
- ); | |
-} | |
- | |
-static inline void | |
-rte_atomic64_inc(rte_atomic64_t *v) | |
-{ | |
- asm volatile( | |
- MPLOCKED | |
- "incq %[cnt]" | |
- : [cnt] "=m" (v->cnt) /* output */ | |
- : "m" (v->cnt) /* input */ | |
- ); | |
-} | |
- | |
-static inline void | |
-rte_atomic64_dec(rte_atomic64_t *v) | |
-{ | |
- asm volatile( | |
- MPLOCKED | |
- "decq %[cnt]" | |
- : [cnt] "=m" (v->cnt) /* output */ | |
- : "m" (v->cnt) /* input */ | |
- ); | |
-} | |
+# error Platform must be built with CONFIG_RTE_FORCE_INTRINSICS | |
+#endif | |
-static inline int64_t | |
-rte_atomic64_add_return(rte_atomic64_t *v, int64_t inc) | |
-{ | |
- int64_t prev = inc; | |
+#ifdef __cplusplus | |
+extern "C" { | |
+#endif | |
- asm volatile( | |
- MPLOCKED | |
- "xaddq %[prev], %[cnt]" | |
- : [prev] "+r" (prev), /* output */ | |
- [cnt] "=m" (v->cnt) | |
- : "m" (v->cnt) /* input */ | |
- ); | |
- return prev + inc; | |
-} | |
+#include "generic/rte_atomic.h" | |
-static inline int64_t | |
-rte_atomic64_sub_return(rte_atomic64_t *v, int64_t dec) | |
-{ | |
- return rte_atomic64_add_return(v, -dec); | |
-} | |
+#define dmb(opt) do { asm volatile("dmb " #opt : : : "memory"); } while (0) | |
-static inline int rte_atomic64_inc_and_test(rte_atomic64_t *v) | |
+/** | |
+ * General memory barrier. | |
+ * | |
+ * Guarantees that the LOAD and STORE operations generated before the | |
+ * barrier occur before the LOAD and STORE operations generated after. | |
+ * This function is architecture dependent. | |
+ */ | |
+static inline void rte_mb(void) | |
{ | |
- uint8_t ret; | |
- | |
- asm volatile( | |
- MPLOCKED | |
- "incq %[cnt] ; " | |
- "sete %[ret]" | |
- : [cnt] "+m" (v->cnt), /* output */ | |
- [ret] "=qm" (ret) | |
- ); | |
- | |
- return ret != 0; | |
+ dmb(ish); | |
} | |
-static inline int rte_atomic64_dec_and_test(rte_atomic64_t *v) | |
+/** | |
+ * Write memory barrier. | |
+ * | |
+ * Guarantees that the STORE operations generated before the barrier | |
+ * occur before the STORE operations generated after. | |
+ * This function is architecture dependent. | |
+ */ | |
+static inline void rte_wmb(void) | |
{ | |
- uint8_t ret; | |
- | |
- asm volatile( | |
- MPLOCKED | |
- "decq %[cnt] ; " | |
- "sete %[ret]" | |
- : [cnt] "+m" (v->cnt), /* output */ | |
- [ret] "=qm" (ret) | |
- ); | |
- return ret != 0; | |
+ dmb(ishst); | |
} | |
-static inline int rte_atomic64_test_and_set(rte_atomic64_t *v) | |
+/** | |
+ * Read memory barrier. | |
+ * | |
+ * Guarantees that the LOAD operations generated before the barrier | |
+ * occur before the LOAD operations generated after. | |
+ * This function is architecture dependent. | |
+ */ | |
+static inline void rte_rmb(void) | |
{ | |
- return rte_atomic64_cmpset((volatile uint64_t *)&v->cnt, 0, 1); | |
+ dmb(ishld); | |
} | |
-static inline void rte_atomic64_clear(rte_atomic64_t *v) | |
-{ | |
- v->cnt = 0; | |
+#ifdef __cplusplus | |
} | |
#endif | |
-#endif /* _RTE_ATOMIC_X86_64_H_ */ | |
+#endif /* _RTE_ATOMIC_ARM64_H_ */ | |
diff --git a/include/dpdk/rte_byteorder.h b/include/dpdk/rte_byteorder.h | |
index ffdb6ef..3f2dd1f 100644 | |
--- a/include/dpdk/rte_byteorder.h | |
+++ b/include/dpdk/rte_byteorder.h | |
@@ -1,8 +1,7 @@ | |
-/*- | |
+/* | |
* BSD LICENSE | |
* | |
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. | |
- * All rights reserved. | |
+ * Copyright(c) 2015 RehiveTech. All rights reserved. | |
* | |
* Redistribution and use in source and binary forms, with or without | |
* modification, are permitted provided that the following conditions | |
@@ -14,7 +13,7 @@ | |
* notice, this list of conditions and the following disclaimer in | |
* the documentation and/or other materials provided with the | |
* distribution. | |
- * * Neither the name of Intel Corporation nor the names of its | |
+ * * Neither the name of RehiveTech nor the names of its | |
* contributors may be used to endorse or promote products derived | |
* from this software without specific prior written permission. | |
* | |
@@ -31,8 +30,12 @@ | |
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
*/ | |
-#ifndef _RTE_BYTEORDER_X86_H_ | |
-#define _RTE_BYTEORDER_X86_H_ | |
+#ifndef _RTE_BYTEORDER_ARM_H_ | |
+#define _RTE_BYTEORDER_ARM_H_ | |
+ | |
+#ifndef RTE_FORCE_INTRINSICS | |
+# error Platform must be built with CONFIG_RTE_FORCE_INTRINSICS | |
+#endif | |
#ifdef __cplusplus | |
extern "C" { | |
@@ -40,61 +43,27 @@ extern "C" { | |
#include "generic/rte_byteorder.h" | |
-#ifndef RTE_BYTE_ORDER | |
-#define RTE_BYTE_ORDER RTE_LITTLE_ENDIAN | |
-#endif | |
+/* fix missing __builtin_bswap16 for gcc older then 4.8 */ | |
+#if !(__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)) | |
-/* | |
- * An architecture-optimized byte swap for a 16-bit value. | |
- * | |
- * Do not use this function directly. The preferred function is rte_bswap16(). | |
- */ | |
static inline uint16_t rte_arch_bswap16(uint16_t _x) | |
{ | |
register uint16_t x = _x; | |
- asm volatile ("xchgb %b[x1],%h[x2]" | |
- : [x1] "=Q" (x) | |
- : [x2] "0" (x) | |
- ); | |
- return x; | |
-} | |
-/* | |
- * An architecture-optimized byte swap for a 32-bit value. | |
- * | |
- * Do not use this function directly. The preferred function is rte_bswap32(). | |
- */ | |
-static inline uint32_t rte_arch_bswap32(uint32_t _x) | |
-{ | |
- register uint32_t x = _x; | |
- asm volatile ("bswap %[x]" | |
- : [x] "+r" (x) | |
+ asm volatile ("rev16 %0,%1" | |
+ : "=r" (x) | |
+ : "r" (x) | |
); | |
return x; | |
} | |
-#ifndef RTE_FORCE_INTRINSICS | |
-#define rte_bswap16(x) ((uint16_t)(__builtin_constant_p(x) ? \ | |
- rte_constant_bswap16(x) : \ | |
- rte_arch_bswap16(x))) | |
- | |
-#define rte_bswap32(x) ((uint32_t)(__builtin_constant_p(x) ? \ | |
- rte_constant_bswap32(x) : \ | |
- rte_arch_bswap32(x))) | |
- | |
-#define rte_bswap64(x) ((uint64_t)(__builtin_constant_p(x) ? \ | |
- rte_constant_bswap64(x) : \ | |
- rte_arch_bswap64(x))) | |
-#else | |
-/* | |
- * __builtin_bswap16 is only available gcc 4.8 and upwards | |
- */ | |
-#if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 8) | |
-#define rte_bswap16(x) ((uint16_t)(__builtin_constant_p(x) ? \ | |
- rte_constant_bswap16(x) : \ | |
+#define rte_bswap16(x) ((uint16_t)(__builtin_constant_p(x) ? \ | |
+ rte_constant_bswap16(x) : \ | |
rte_arch_bswap16(x))) | |
#endif | |
-#endif | |
+ | |
+/* ARM architecture is bi-endian (both big and little). */ | |
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN | |
#define rte_cpu_to_le_16(x) (x) | |
#define rte_cpu_to_le_32(x) (x) | |
@@ -112,14 +81,27 @@ static inline uint32_t rte_arch_bswap32(uint32_t _x) | |
#define rte_be_to_cpu_32(x) rte_bswap32(x) | |
#define rte_be_to_cpu_64(x) rte_bswap64(x) | |
-#ifdef RTE_ARCH_I686 | |
-#include "rte_byteorder_32.h" | |
-#else | |
-#include "rte_byteorder_64.h" | |
+#else /* RTE_BIG_ENDIAN */ | |
+ | |
+#define rte_cpu_to_le_16(x) rte_bswap16(x) | |
+#define rte_cpu_to_le_32(x) rte_bswap32(x) | |
+#define rte_cpu_to_le_64(x) rte_bswap64(x) | |
+ | |
+#define rte_cpu_to_be_16(x) (x) | |
+#define rte_cpu_to_be_32(x) (x) | |
+#define rte_cpu_to_be_64(x) (x) | |
+ | |
+#define rte_le_to_cpu_16(x) rte_bswap16(x) | |
+#define rte_le_to_cpu_32(x) rte_bswap32(x) | |
+#define rte_le_to_cpu_64(x) rte_bswap64(x) | |
+ | |
+#define rte_be_to_cpu_16(x) (x) | |
+#define rte_be_to_cpu_32(x) (x) | |
+#define rte_be_to_cpu_64(x) (x) | |
#endif | |
#ifdef __cplusplus | |
} | |
#endif | |
-#endif /* _RTE_BYTEORDER_X86_H_ */ | |
+#endif /* _RTE_BYTEORDER_ARM_H_ */ | |
diff --git a/include/dpdk/rte_config.h b/include/dpdk/rte_config.h | |
index eff3575..f9d5a60 100644 | |
--- a/include/dpdk/rte_config.h | |
+++ b/include/dpdk/rte_config.h | |
@@ -3,12 +3,13 @@ | |
#undef RTE_EXEC_ENV | |
#define RTE_EXEC_ENV "linuxapp" | |
#undef RTE_ARCH | |
-#define RTE_ARCH "x86_64" | |
+#define RTE_ARCH "arm64" | |
#undef RTE_MACHINE | |
-#define RTE_MACHINE "default" | |
+#define RTE_MACHINE "armv8a" | |
#undef RTE_TOOLCHAIN | |
#define RTE_TOOLCHAIN "gcc" | |
#undef RTE_FORCE_INTRINSICS | |
+#define RTE_FORCE_INTRINSICS 1 | |
#undef RTE_ARCH_STRICT_ALIGN | |
#undef RTE_BUILD_SHARED_LIB | |
#define RTE_BUILD_SHARED_LIB 1 | |
@@ -87,7 +88,6 @@ | |
#undef RTE_IXGBE_RX_OLFLAGS_ENABLE | |
#define RTE_IXGBE_RX_OLFLAGS_ENABLE 1 | |
#undef RTE_LIBRTE_I40E_PMD | |
-#define RTE_LIBRTE_I40E_PMD 1 | |
#undef RTE_LIBRTE_I40E_DEBUG_INIT | |
#undef RTE_LIBRTE_I40E_DEBUG_RX | |
#undef RTE_LIBRTE_I40E_DEBUG_TX | |
@@ -109,7 +109,6 @@ | |
#undef RTE_LIBRTE_I40E_ITR_INTERVAL | |
#define RTE_LIBRTE_I40E_ITR_INTERVAL -1 | |
#undef RTE_LIBRTE_FM10K_PMD | |
-#define RTE_LIBRTE_FM10K_PMD 1 | |
#undef RTE_LIBRTE_FM10K_DEBUG_INIT | |
#undef RTE_LIBRTE_FM10K_DEBUG_RX | |
#undef RTE_LIBRTE_FM10K_DEBUG_TX | |
@@ -341,12 +340,11 @@ | |
#undef RTE_TEST_PMD_RECORD_BURST_STATS | |
#undef RTE_EXEC_ENV_LINUXAPP | |
#define RTE_EXEC_ENV_LINUXAPP 1 | |
-#undef RTE_ARCH_X86_64 | |
-#define RTE_ARCH_X86_64 1 | |
-#undef RTE_ARCH_X86 | |
-#define RTE_ARCH_X86 1 | |
+#undef RTE_ARCH_ARM64 | |
+#define RTE_ARCH_ARM64 1 | |
#undef RTE_ARCH_64 | |
#define RTE_ARCH_64 1 | |
#undef RTE_TOOLCHAIN_GCC | |
#define RTE_TOOLCHAIN_GCC 1 | |
+#undef RTE_LIBRTE_IVSHMEM | |
#endif /* __RTE_CONFIG_H */ | |
diff --git a/include/dpdk/rte_cpuflags.h b/include/dpdk/rte_cpuflags.h | |
index 26204fa..b8f6288 100644 | |
--- a/include/dpdk/rte_cpuflags.h | |
+++ b/include/dpdk/rte_cpuflags.h | |
@@ -1,8 +1,7 @@ | |
-/*- | |
+/* | |
* BSD LICENSE | |
* | |
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. | |
- * All rights reserved. | |
+ * Copyright(c) 2015 RehiveTech. All rights reserved. | |
* | |
* Redistribution and use in source and binary forms, with or without | |
* modification, are permitted provided that the following conditions | |
@@ -14,7 +13,7 @@ | |
* notice, this list of conditions and the following disclaimer in | |
* the documentation and/or other materials provided with the | |
* distribution. | |
- * * Neither the name of Intel Corporation nor the names of its | |
+ * * Neither the name of RehiveTech nor the names of its | |
* contributors may be used to endorse or promote products derived | |
* from this software without specific prior written permission. | |
* | |
@@ -31,123 +30,13 @@ | |
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
*/ | |
-#ifndef _RTE_CPUFLAGS_X86_64_H_ | |
-#define _RTE_CPUFLAGS_X86_64_H_ | |
+#ifndef _RTE_CPUFLAGS_ARM_H_ | |
+#define _RTE_CPUFLAGS_ARM_H_ | |
-#ifdef __cplusplus | |
-extern "C" { | |
+#ifdef RTE_ARCH_64 | |
+#include <rte_cpuflags_64.h> | |
+#else | |
+#include <rte_cpuflags_32.h> | |
#endif | |
-enum rte_cpu_flag_t { | |
- /* (EAX 01h) ECX features*/ | |
- RTE_CPUFLAG_SSE3 = 0, /**< SSE3 */ | |
- RTE_CPUFLAG_PCLMULQDQ, /**< PCLMULQDQ */ | |
- RTE_CPUFLAG_DTES64, /**< DTES64 */ | |
- RTE_CPUFLAG_MONITOR, /**< MONITOR */ | |
- RTE_CPUFLAG_DS_CPL, /**< DS_CPL */ | |
- RTE_CPUFLAG_VMX, /**< VMX */ | |
- RTE_CPUFLAG_SMX, /**< SMX */ | |
- RTE_CPUFLAG_EIST, /**< EIST */ | |
- RTE_CPUFLAG_TM2, /**< TM2 */ | |
- RTE_CPUFLAG_SSSE3, /**< SSSE3 */ | |
- RTE_CPUFLAG_CNXT_ID, /**< CNXT_ID */ | |
- RTE_CPUFLAG_FMA, /**< FMA */ | |
- RTE_CPUFLAG_CMPXCHG16B, /**< CMPXCHG16B */ | |
- RTE_CPUFLAG_XTPR, /**< XTPR */ | |
- RTE_CPUFLAG_PDCM, /**< PDCM */ | |
- RTE_CPUFLAG_PCID, /**< PCID */ | |
- RTE_CPUFLAG_DCA, /**< DCA */ | |
- RTE_CPUFLAG_SSE4_1, /**< SSE4_1 */ | |
- RTE_CPUFLAG_SSE4_2, /**< SSE4_2 */ | |
- RTE_CPUFLAG_X2APIC, /**< X2APIC */ | |
- RTE_CPUFLAG_MOVBE, /**< MOVBE */ | |
- RTE_CPUFLAG_POPCNT, /**< POPCNT */ | |
- RTE_CPUFLAG_TSC_DEADLINE, /**< TSC_DEADLINE */ | |
- RTE_CPUFLAG_AES, /**< AES */ | |
- RTE_CPUFLAG_XSAVE, /**< XSAVE */ | |
- RTE_CPUFLAG_OSXSAVE, /**< OSXSAVE */ | |
- RTE_CPUFLAG_AVX, /**< AVX */ | |
- RTE_CPUFLAG_F16C, /**< F16C */ | |
- RTE_CPUFLAG_RDRAND, /**< RDRAND */ | |
- | |
- /* (EAX 01h) EDX features */ | |
- RTE_CPUFLAG_FPU, /**< FPU */ | |
- RTE_CPUFLAG_VME, /**< VME */ | |
- RTE_CPUFLAG_DE, /**< DE */ | |
- RTE_CPUFLAG_PSE, /**< PSE */ | |
- RTE_CPUFLAG_TSC, /**< TSC */ | |
- RTE_CPUFLAG_MSR, /**< MSR */ | |
- RTE_CPUFLAG_PAE, /**< PAE */ | |
- RTE_CPUFLAG_MCE, /**< MCE */ | |
- RTE_CPUFLAG_CX8, /**< CX8 */ | |
- RTE_CPUFLAG_APIC, /**< APIC */ | |
- RTE_CPUFLAG_SEP, /**< SEP */ | |
- RTE_CPUFLAG_MTRR, /**< MTRR */ | |
- RTE_CPUFLAG_PGE, /**< PGE */ | |
- RTE_CPUFLAG_MCA, /**< MCA */ | |
- RTE_CPUFLAG_CMOV, /**< CMOV */ | |
- RTE_CPUFLAG_PAT, /**< PAT */ | |
- RTE_CPUFLAG_PSE36, /**< PSE36 */ | |
- RTE_CPUFLAG_PSN, /**< PSN */ | |
- RTE_CPUFLAG_CLFSH, /**< CLFSH */ | |
- RTE_CPUFLAG_DS, /**< DS */ | |
- RTE_CPUFLAG_ACPI, /**< ACPI */ | |
- RTE_CPUFLAG_MMX, /**< MMX */ | |
- RTE_CPUFLAG_FXSR, /**< FXSR */ | |
- RTE_CPUFLAG_SSE, /**< SSE */ | |
- RTE_CPUFLAG_SSE2, /**< SSE2 */ | |
- RTE_CPUFLAG_SS, /**< SS */ | |
- RTE_CPUFLAG_HTT, /**< HTT */ | |
- RTE_CPUFLAG_TM, /**< TM */ | |
- RTE_CPUFLAG_PBE, /**< PBE */ | |
- | |
- /* (EAX 06h) EAX features */ | |
- RTE_CPUFLAG_DIGTEMP, /**< DIGTEMP */ | |
- RTE_CPUFLAG_TRBOBST, /**< TRBOBST */ | |
- RTE_CPUFLAG_ARAT, /**< ARAT */ | |
- RTE_CPUFLAG_PLN, /**< PLN */ | |
- RTE_CPUFLAG_ECMD, /**< ECMD */ | |
- RTE_CPUFLAG_PTM, /**< PTM */ | |
- | |
- /* (EAX 06h) ECX features */ | |
- RTE_CPUFLAG_MPERF_APERF_MSR, /**< MPERF_APERF_MSR */ | |
- RTE_CPUFLAG_ACNT2, /**< ACNT2 */ | |
- RTE_CPUFLAG_ENERGY_EFF, /**< ENERGY_EFF */ | |
- | |
- /* (EAX 07h, ECX 0h) EBX features */ | |
- RTE_CPUFLAG_FSGSBASE, /**< FSGSBASE */ | |
- RTE_CPUFLAG_BMI1, /**< BMI1 */ | |
- RTE_CPUFLAG_HLE, /**< Hardware Lock elision */ | |
- RTE_CPUFLAG_AVX2, /**< AVX2 */ | |
- RTE_CPUFLAG_SMEP, /**< SMEP */ | |
- RTE_CPUFLAG_BMI2, /**< BMI2 */ | |
- RTE_CPUFLAG_ERMS, /**< ERMS */ | |
- RTE_CPUFLAG_INVPCID, /**< INVPCID */ | |
- RTE_CPUFLAG_RTM, /**< Transactional memory */ | |
- RTE_CPUFLAG_AVX512F, /**< AVX512F */ | |
- | |
- /* (EAX 80000001h) ECX features */ | |
- RTE_CPUFLAG_LAHF_SAHF, /**< LAHF_SAHF */ | |
- RTE_CPUFLAG_LZCNT, /**< LZCNT */ | |
- | |
- /* (EAX 80000001h) EDX features */ | |
- RTE_CPUFLAG_SYSCALL, /**< SYSCALL */ | |
- RTE_CPUFLAG_XD, /**< XD */ | |
- RTE_CPUFLAG_1GB_PG, /**< 1GB_PG */ | |
- RTE_CPUFLAG_RDTSCP, /**< RDTSCP */ | |
- RTE_CPUFLAG_EM64T, /**< EM64T */ | |
- | |
- /* (EAX 80000007h) EDX features */ | |
- RTE_CPUFLAG_INVTSC, /**< INVTSC */ | |
- | |
- /* The last item */ | |
- RTE_CPUFLAG_NUMFLAGS, /**< This should always be the last! */ | |
-}; | |
- | |
-#include "generic/rte_cpuflags.h" | |
- | |
-#ifdef __cplusplus | |
-} | |
-#endif | |
- | |
-#endif /* _RTE_CPUFLAGS_X86_64_H_ */ | |
+#endif /* _RTE_CPUFLAGS_ARM_H_ */ | |
diff --git a/include/dpdk/rte_cycles.h b/include/dpdk/rte_cycles.h | |
index 6e3c7d8..a8009a0 100644 | |
--- a/include/dpdk/rte_cycles.h | |
+++ b/include/dpdk/rte_cycles.h | |
@@ -1,8 +1,7 @@ | |
-/*- | |
+/* | |
* BSD LICENSE | |
* | |
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. | |
- * All rights reserved. | |
+ * Copyright(c) 2015 RehiveTech. All rights reserved. | |
* | |
* Redistribution and use in source and binary forms, with or without | |
* modification, are permitted provided that the following conditions | |
@@ -14,7 +13,7 @@ | |
* notice, this list of conditions and the following disclaimer in | |
* the documentation and/or other materials provided with the | |
* distribution. | |
- * * Neither the name of Intel Corporation nor the names of its | |
+ * * Neither the name of RehiveTech nor the names of its | |
* contributors may be used to endorse or promote products derived | |
* from this software without specific prior written permission. | |
* | |
@@ -30,92 +29,14 @@ | |
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
*/ | |
-/* BSD LICENSE | |
- * | |
- * Copyright(c) 2013 6WIND. | |
- * | |
- * Redistribution and use in source and binary forms, with or without | |
- * modification, are permitted provided that the following conditions | |
- * are met: | |
- * | |
- * * Redistributions of source code must retain the above copyright | |
- * notice, this list of conditions and the following disclaimer. | |
- * * Redistributions in binary form must reproduce the above copyright | |
- * notice, this list of conditions and the following disclaimer in | |
- * the documentation and/or other materials provided with the | |
- * distribution. | |
- * * Neither the name of 6WIND S.A. nor the names of its | |
- * contributors may be used to endorse or promote products derived | |
- * from this software without specific prior written permission. | |
- * | |
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
- */ | |
- | |
-#ifndef _RTE_CYCLES_X86_64_H_ | |
-#define _RTE_CYCLES_X86_64_H_ | |
- | |
-#ifdef __cplusplus | |
-extern "C" { | |
-#endif | |
- | |
-#include "generic/rte_cycles.h" | |
- | |
-#ifdef RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT | |
-/* Global switch to use VMWARE mapping of TSC instead of RDTSC */ | |
-extern int rte_cycles_vmware_tsc_map; | |
-#include <rte_branch_prediction.h> | |
-#endif | |
- | |
-static inline uint64_t | |
-rte_rdtsc(void) | |
-{ | |
- union { | |
- uint64_t tsc_64; | |
- struct { | |
- uint32_t lo_32; | |
- uint32_t hi_32; | |
- }; | |
- } tsc; | |
- | |
-#ifdef RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT | |
- if (unlikely(rte_cycles_vmware_tsc_map)) { | |
- /* ecx = 0x10000 corresponds to the physical TSC for VMware */ | |
- asm volatile("rdpmc" : | |
- "=a" (tsc.lo_32), | |
- "=d" (tsc.hi_32) : | |
- "c"(0x10000)); | |
- return tsc.tsc_64; | |
- } | |
-#endif | |
- | |
- asm volatile("rdtsc" : | |
- "=a" (tsc.lo_32), | |
- "=d" (tsc.hi_32)); | |
- return tsc.tsc_64; | |
-} | |
- | |
-static inline uint64_t | |
-rte_rdtsc_precise(void) | |
-{ | |
- rte_mb(); | |
- return rte_rdtsc(); | |
-} | |
-static inline uint64_t | |
-rte_get_tsc_cycles(void) { return rte_rdtsc(); } | |
+#ifndef _RTE_CYCLES_ARM_H_ | |
+#define _RTE_CYCLES_ARM_H_ | |
-#ifdef __cplusplus | |
-} | |
+#ifdef RTE_ARCH_64 | |
+#include <rte_cycles_64.h> | |
+#else | |
+#include <rte_cycles_32.h> | |
#endif | |
-#endif /* _RTE_CYCLES_X86_64_H_ */ | |
+#endif /* _RTE_CYCLES_ARM_H_ */ | |
diff --git a/include/dpdk/rte_memcpy.h b/include/dpdk/rte_memcpy.h | |
index 413035e..1d562c3 100644 | |
--- a/include/dpdk/rte_memcpy.h | |
+++ b/include/dpdk/rte_memcpy.h | |
@@ -1,8 +1,7 @@ | |
-/*- | |
+/* | |
* BSD LICENSE | |
* | |
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. | |
- * All rights reserved. | |
+ * Copyright(c) 2015 RehiveTech. All rights reserved. | |
* | |
* Redistribution and use in source and binary forms, with or without | |
* modification, are permitted provided that the following conditions | |
@@ -14,7 +13,7 @@ | |
* notice, this list of conditions and the following disclaimer in | |
* the documentation and/or other materials provided with the | |
* distribution. | |
- * * Neither the name of Intel Corporation nor the names of its | |
+ * * Neither the name of RehiveTech nor the names of its | |
* contributors may be used to endorse or promote products derived | |
* from this software without specific prior written permission. | |
* | |
@@ -31,798 +30,13 @@ | |
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
*/ | |
-#ifndef _RTE_MEMCPY_X86_64_H_ | |
-#define _RTE_MEMCPY_X86_64_H_ | |
+#ifndef _RTE_MEMCPY_ARM_H_ | |
+#define _RTE_MEMCPY_ARM_H_ | |
-/** | |
- * @file | |
- * | |
- * Functions for SSE/AVX/AVX2/AVX512 implementation of memcpy(). | |
- */ | |
- | |
-#include <stdio.h> | |
-#include <stdint.h> | |
-#include <string.h> | |
-#include <rte_vect.h> | |
- | |
-#ifdef __cplusplus | |
-extern "C" { | |
-#endif | |
- | |
-/** | |
- * Copy bytes from one location to another. The locations must not overlap. | |
- * | |
- * @note This is implemented as a macro, so it's address should not be taken | |
- * and care is needed as parameter expressions may be evaluated multiple times. | |
- * | |
- * @param dst | |
- * Pointer to the destination of the data. | |
- * @param src | |
- * Pointer to the source data. | |
- * @param n | |
- * Number of bytes to copy. | |
- * @return | |
- * Pointer to the destination data. | |
- */ | |
-static inline void * | |
-rte_memcpy(void *dst, const void *src, size_t n) __attribute__((always_inline)); | |
- | |
-#ifdef RTE_MACHINE_CPUFLAG_AVX512F | |
- | |
-/** | |
- * AVX512 implementation below | |
- */ | |
- | |
-/** | |
- * Copy 16 bytes from one location to another, | |
- * locations should not overlap. | |
- */ | |
-static inline void | |
-rte_mov16(uint8_t *dst, const uint8_t *src) | |
-{ | |
- __m128i xmm0; | |
- | |
- xmm0 = _mm_loadu_si128((const __m128i *)src); | |
- _mm_storeu_si128((__m128i *)dst, xmm0); | |
-} | |
- | |
-/** | |
- * Copy 32 bytes from one location to another, | |
- * locations should not overlap. | |
- */ | |
-static inline void | |
-rte_mov32(uint8_t *dst, const uint8_t *src) | |
-{ | |
- __m256i ymm0; | |
- | |
- ymm0 = _mm256_loadu_si256((const __m256i *)src); | |
- _mm256_storeu_si256((__m256i *)dst, ymm0); | |
-} | |
- | |
-/** | |
- * Copy 64 bytes from one location to another, | |
- * locations should not overlap. | |
- */ | |
-static inline void | |
-rte_mov64(uint8_t *dst, const uint8_t *src) | |
-{ | |
- __m512i zmm0; | |
- | |
- zmm0 = _mm512_loadu_si512((const void *)src); | |
- _mm512_storeu_si512((void *)dst, zmm0); | |
-} | |
- | |
-/** | |
- * Copy 128 bytes from one location to another, | |
- * locations should not overlap. | |
- */ | |
-static inline void | |
-rte_mov128(uint8_t *dst, const uint8_t *src) | |
-{ | |
- rte_mov64(dst + 0 * 64, src + 0 * 64); | |
- rte_mov64(dst + 1 * 64, src + 1 * 64); | |
-} | |
- | |
-/** | |
- * Copy 256 bytes from one location to another, | |
- * locations should not overlap. | |
- */ | |
-static inline void | |
-rte_mov256(uint8_t *dst, const uint8_t *src) | |
-{ | |
- rte_mov64(dst + 0 * 64, src + 0 * 64); | |
- rte_mov64(dst + 1 * 64, src + 1 * 64); | |
- rte_mov64(dst + 2 * 64, src + 2 * 64); | |
- rte_mov64(dst + 3 * 64, src + 3 * 64); | |
-} | |
- | |
-/** | |
- * Copy 128-byte blocks from one location to another, | |
- * locations should not overlap. | |
- */ | |
-static inline void | |
-rte_mov128blocks(uint8_t *dst, const uint8_t *src, size_t n) | |
-{ | |
- __m512i zmm0, zmm1; | |
- | |
- while (n >= 128) { | |
- zmm0 = _mm512_loadu_si512((const void *)(src + 0 * 64)); | |
- n -= 128; | |
- zmm1 = _mm512_loadu_si512((const void *)(src + 1 * 64)); | |
- src = src + 128; | |
- _mm512_storeu_si512((void *)(dst + 0 * 64), zmm0); | |
- _mm512_storeu_si512((void *)(dst + 1 * 64), zmm1); | |
- dst = dst + 128; | |
- } | |
-} | |
- | |
-/** | |
- * Copy 512-byte blocks from one location to another, | |
- * locations should not overlap. | |
- */ | |
-static inline void | |
-rte_mov512blocks(uint8_t *dst, const uint8_t *src, size_t n) | |
-{ | |
- __m512i zmm0, zmm1, zmm2, zmm3, zmm4, zmm5, zmm6, zmm7; | |
- | |
- while (n >= 512) { | |
- zmm0 = _mm512_loadu_si512((const void *)(src + 0 * 64)); | |
- n -= 512; | |
- zmm1 = _mm512_loadu_si512((const void *)(src + 1 * 64)); | |
- zmm2 = _mm512_loadu_si512((const void *)(src + 2 * 64)); | |
- zmm3 = _mm512_loadu_si512((const void *)(src + 3 * 64)); | |
- zmm4 = _mm512_loadu_si512((const void *)(src + 4 * 64)); | |
- zmm5 = _mm512_loadu_si512((const void *)(src + 5 * 64)); | |
- zmm6 = _mm512_loadu_si512((const void *)(src + 6 * 64)); | |
- zmm7 = _mm512_loadu_si512((const void *)(src + 7 * 64)); | |
- src = src + 512; | |
- _mm512_storeu_si512((void *)(dst + 0 * 64), zmm0); | |
- _mm512_storeu_si512((void *)(dst + 1 * 64), zmm1); | |
- _mm512_storeu_si512((void *)(dst + 2 * 64), zmm2); | |
- _mm512_storeu_si512((void *)(dst + 3 * 64), zmm3); | |
- _mm512_storeu_si512((void *)(dst + 4 * 64), zmm4); | |
- _mm512_storeu_si512((void *)(dst + 5 * 64), zmm5); | |
- _mm512_storeu_si512((void *)(dst + 6 * 64), zmm6); | |
- _mm512_storeu_si512((void *)(dst + 7 * 64), zmm7); | |
- dst = dst + 512; | |
- } | |
-} | |
- | |
-static inline void * | |
-rte_memcpy(void *dst, const void *src, size_t n) | |
-{ | |
- uintptr_t dstu = (uintptr_t)dst; | |
- uintptr_t srcu = (uintptr_t)src; | |
- void *ret = dst; | |
- size_t dstofss; | |
- size_t bits; | |
- | |
- /** | |
- * Copy less than 16 bytes | |
- */ | |
- if (n < 16) { | |
- if (n & 0x01) { | |
- *(uint8_t *)dstu = *(const uint8_t *)srcu; | |
- srcu = (uintptr_t)((const uint8_t *)srcu + 1); | |
- dstu = (uintptr_t)((uint8_t *)dstu + 1); | |
- } | |
- if (n & 0x02) { | |
- *(uint16_t *)dstu = *(const uint16_t *)srcu; | |
- srcu = (uintptr_t)((const uint16_t *)srcu + 1); | |
- dstu = (uintptr_t)((uint16_t *)dstu + 1); | |
- } | |
- if (n & 0x04) { | |
- *(uint32_t *)dstu = *(const uint32_t *)srcu; | |
- srcu = (uintptr_t)((const uint32_t *)srcu + 1); | |
- dstu = (uintptr_t)((uint32_t *)dstu + 1); | |
- } | |
- if (n & 0x08) | |
- *(uint64_t *)dstu = *(const uint64_t *)srcu; | |
- return ret; | |
- } | |
- | |
- /** | |
- * Fast way when copy size doesn't exceed 512 bytes | |
- */ | |
- if (n <= 32) { | |
- rte_mov16((uint8_t *)dst, (const uint8_t *)src); | |
- rte_mov16((uint8_t *)dst - 16 + n, | |
- (const uint8_t *)src - 16 + n); | |
- return ret; | |
- } | |
- if (n <= 64) { | |
- rte_mov32((uint8_t *)dst, (const uint8_t *)src); | |
- rte_mov32((uint8_t *)dst - 32 + n, | |
- (const uint8_t *)src - 32 + n); | |
- return ret; | |
- } | |
- if (n <= 512) { | |
- if (n >= 256) { | |
- n -= 256; | |
- rte_mov256((uint8_t *)dst, (const uint8_t *)src); | |
- src = (const uint8_t *)src + 256; | |
- dst = (uint8_t *)dst + 256; | |
- } | |
- if (n >= 128) { | |
- n -= 128; | |
- rte_mov128((uint8_t *)dst, (const uint8_t *)src); | |
- src = (const uint8_t *)src + 128; | |
- dst = (uint8_t *)dst + 128; | |
- } | |
-COPY_BLOCK_128_BACK63: | |
- if (n > 64) { | |
- rte_mov64((uint8_t *)dst, (const uint8_t *)src); | |
- rte_mov64((uint8_t *)dst - 64 + n, | |
- (const uint8_t *)src - 64 + n); | |
- return ret; | |
- } | |
- if (n > 0) | |
- rte_mov64((uint8_t *)dst - 64 + n, | |
- (const uint8_t *)src - 64 + n); | |
- return ret; | |
- } | |
- | |
- /** | |
- * Make store aligned when copy size exceeds 512 bytes | |
- */ | |
- dstofss = ((uintptr_t)dst & 0x3F); | |
- if (dstofss > 0) { | |
- dstofss = 64 - dstofss; | |
- n -= dstofss; | |
- rte_mov64((uint8_t *)dst, (const uint8_t *)src); | |
- src = (const uint8_t *)src + dstofss; | |
- dst = (uint8_t *)dst + dstofss; | |
- } | |
- | |
- /** | |
- * Copy 512-byte blocks. | |
- * Use copy block function for better instruction order control, | |
- * which is important when load is unaligned. | |
- */ | |
- rte_mov512blocks((uint8_t *)dst, (const uint8_t *)src, n); | |
- bits = n; | |
- n = n & 511; | |
- bits -= n; | |
- src = (const uint8_t *)src + bits; | |
- dst = (uint8_t *)dst + bits; | |
- | |
- /** | |
- * Copy 128-byte blocks. | |
- * Use copy block function for better instruction order control, | |
- * which is important when load is unaligned. | |
- */ | |
- if (n >= 128) { | |
- rte_mov128blocks((uint8_t *)dst, (const uint8_t *)src, n); | |
- bits = n; | |
- n = n & 127; | |
- bits -= n; | |
- src = (const uint8_t *)src + bits; | |
- dst = (uint8_t *)dst + bits; | |
- } | |
- | |
- /** | |
- * Copy whatever left | |
- */ | |
- goto COPY_BLOCK_128_BACK63; | |
-} | |
- | |
-#elif defined RTE_MACHINE_CPUFLAG_AVX2 | |
- | |
-/** | |
- * AVX2 implementation below | |
- */ | |
- | |
-/** | |
- * Copy 16 bytes from one location to another, | |
- * locations should not overlap. | |
- */ | |
-static inline void | |
-rte_mov16(uint8_t *dst, const uint8_t *src) | |
-{ | |
- __m128i xmm0; | |
- | |
- xmm0 = _mm_loadu_si128((const __m128i *)src); | |
- _mm_storeu_si128((__m128i *)dst, xmm0); | |
-} | |
- | |
-/** | |
- * Copy 32 bytes from one location to another, | |
- * locations should not overlap. | |
- */ | |
-static inline void | |
-rte_mov32(uint8_t *dst, const uint8_t *src) | |
-{ | |
- __m256i ymm0; | |
- | |
- ymm0 = _mm256_loadu_si256((const __m256i *)src); | |
- _mm256_storeu_si256((__m256i *)dst, ymm0); | |
-} | |
- | |
-/** | |
- * Copy 64 bytes from one location to another, | |
- * locations should not overlap. | |
- */ | |
-static inline void | |
-rte_mov64(uint8_t *dst, const uint8_t *src) | |
-{ | |
- rte_mov32((uint8_t *)dst + 0 * 32, (const uint8_t *)src + 0 * 32); | |
- rte_mov32((uint8_t *)dst + 1 * 32, (const uint8_t *)src + 1 * 32); | |
-} | |
- | |
-/** | |
- * Copy 128 bytes from one location to another, | |
- * locations should not overlap. | |
- */ | |
-static inline void | |
-rte_mov128(uint8_t *dst, const uint8_t *src) | |
-{ | |
- rte_mov32((uint8_t *)dst + 0 * 32, (const uint8_t *)src + 0 * 32); | |
- rte_mov32((uint8_t *)dst + 1 * 32, (const uint8_t *)src + 1 * 32); | |
- rte_mov32((uint8_t *)dst + 2 * 32, (const uint8_t *)src + 2 * 32); | |
- rte_mov32((uint8_t *)dst + 3 * 32, (const uint8_t *)src + 3 * 32); | |
-} | |
- | |
-/** | |
- * Copy 128-byte blocks from one location to another, | |
- * locations should not overlap. | |
- */ | |
-static inline void | |
-rte_mov128blocks(uint8_t *dst, const uint8_t *src, size_t n) | |
-{ | |
- __m256i ymm0, ymm1, ymm2, ymm3; | |
- | |
- while (n >= 128) { | |
- ymm0 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 0 * 32)); | |
- n -= 128; | |
- ymm1 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 1 * 32)); | |
- ymm2 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 2 * 32)); | |
- ymm3 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 3 * 32)); | |
- src = (const uint8_t *)src + 128; | |
- _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 0 * 32), ymm0); | |
- _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 1 * 32), ymm1); | |
- _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 2 * 32), ymm2); | |
- _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 3 * 32), ymm3); | |
- dst = (uint8_t *)dst + 128; | |
- } | |
-} | |
- | |
-static inline void * | |
-rte_memcpy(void *dst, const void *src, size_t n) | |
-{ | |
- uintptr_t dstu = (uintptr_t)dst; | |
- uintptr_t srcu = (uintptr_t)src; | |
- void *ret = dst; | |
- size_t dstofss; | |
- size_t bits; | |
- | |
- /** | |
- * Copy less than 16 bytes | |
- */ | |
- if (n < 16) { | |
- if (n & 0x01) { | |
- *(uint8_t *)dstu = *(const uint8_t *)srcu; | |
- srcu = (uintptr_t)((const uint8_t *)srcu + 1); | |
- dstu = (uintptr_t)((uint8_t *)dstu + 1); | |
- } | |
- if (n & 0x02) { | |
- *(uint16_t *)dstu = *(const uint16_t *)srcu; | |
- srcu = (uintptr_t)((const uint16_t *)srcu + 1); | |
- dstu = (uintptr_t)((uint16_t *)dstu + 1); | |
- } | |
- if (n & 0x04) { | |
- *(uint32_t *)dstu = *(const uint32_t *)srcu; | |
- srcu = (uintptr_t)((const uint32_t *)srcu + 1); | |
- dstu = (uintptr_t)((uint32_t *)dstu + 1); | |
- } | |
- if (n & 0x08) { | |
- *(uint64_t *)dstu = *(const uint64_t *)srcu; | |
- } | |
- return ret; | |
- } | |
- | |
- /** | |
- * Fast way when copy size doesn't exceed 256 bytes | |
- */ | |
- if (n <= 32) { | |
- rte_mov16((uint8_t *)dst, (const uint8_t *)src); | |
- rte_mov16((uint8_t *)dst - 16 + n, | |
- (const uint8_t *)src - 16 + n); | |
- return ret; | |
- } | |
- if (n <= 48) { | |
- rte_mov16((uint8_t *)dst, (const uint8_t *)src); | |
- rte_mov16((uint8_t *)dst + 16, (const uint8_t *)src + 16); | |
- rte_mov16((uint8_t *)dst - 16 + n, | |
- (const uint8_t *)src - 16 + n); | |
- return ret; | |
- } | |
- if (n <= 64) { | |
- rte_mov32((uint8_t *)dst, (const uint8_t *)src); | |
- rte_mov32((uint8_t *)dst - 32 + n, | |
- (const uint8_t *)src - 32 + n); | |
- return ret; | |
- } | |
- if (n <= 256) { | |
- if (n >= 128) { | |
- n -= 128; | |
- rte_mov128((uint8_t *)dst, (const uint8_t *)src); | |
- src = (const uint8_t *)src + 128; | |
- dst = (uint8_t *)dst + 128; | |
- } | |
-COPY_BLOCK_128_BACK31: | |
- if (n >= 64) { | |
- n -= 64; | |
- rte_mov64((uint8_t *)dst, (const uint8_t *)src); | |
- src = (const uint8_t *)src + 64; | |
- dst = (uint8_t *)dst + 64; | |
- } | |
- if (n > 32) { | |
- rte_mov32((uint8_t *)dst, (const uint8_t *)src); | |
- rte_mov32((uint8_t *)dst - 32 + n, | |
- (const uint8_t *)src - 32 + n); | |
- return ret; | |
- } | |
- if (n > 0) { | |
- rte_mov32((uint8_t *)dst - 32 + n, | |
- (const uint8_t *)src - 32 + n); | |
- } | |
- return ret; | |
- } | |
- | |
- /** | |
- * Make store aligned when copy size exceeds 256 bytes | |
- */ | |
- dstofss = (uintptr_t)dst & 0x1F; | |
- if (dstofss > 0) { | |
- dstofss = 32 - dstofss; | |
- n -= dstofss; | |
- rte_mov32((uint8_t *)dst, (const uint8_t *)src); | |
- src = (const uint8_t *)src + dstofss; | |
- dst = (uint8_t *)dst + dstofss; | |
- } | |
- | |
- /** | |
- * Copy 128-byte blocks | |
- */ | |
- rte_mov128blocks((uint8_t *)dst, (const uint8_t *)src, n); | |
- bits = n; | |
- n = n & 127; | |
- bits -= n; | |
- src = (const uint8_t *)src + bits; | |
- dst = (uint8_t *)dst + bits; | |
- | |
- /** | |
- * Copy whatever left | |
- */ | |
- goto COPY_BLOCK_128_BACK31; | |
-} | |
- | |
-#else /* RTE_MACHINE_CPUFLAG */ | |
- | |
-/** | |
- * SSE & AVX implementation below | |
- */ | |
- | |
-/** | |
- * Copy 16 bytes from one location to another, | |
- * locations should not overlap. | |
- */ | |
-static inline void | |
-rte_mov16(uint8_t *dst, const uint8_t *src) | |
-{ | |
- __m128i xmm0; | |
- | |
- xmm0 = _mm_loadu_si128((const __m128i *)(const __m128i *)src); | |
- _mm_storeu_si128((__m128i *)dst, xmm0); | |
-} | |
- | |
-/** | |
- * Copy 32 bytes from one location to another, | |
- * locations should not overlap. | |
- */ | |
-static inline void | |
-rte_mov32(uint8_t *dst, const uint8_t *src) | |
-{ | |
- rte_mov16((uint8_t *)dst + 0 * 16, (const uint8_t *)src + 0 * 16); | |
- rte_mov16((uint8_t *)dst + 1 * 16, (const uint8_t *)src + 1 * 16); | |
-} | |
- | |
-/** | |
- * Copy 64 bytes from one location to another, | |
- * locations should not overlap. | |
- */ | |
-static inline void | |
-rte_mov64(uint8_t *dst, const uint8_t *src) | |
-{ | |
- rte_mov16((uint8_t *)dst + 0 * 16, (const uint8_t *)src + 0 * 16); | |
- rte_mov16((uint8_t *)dst + 1 * 16, (const uint8_t *)src + 1 * 16); | |
- rte_mov16((uint8_t *)dst + 2 * 16, (const uint8_t *)src + 2 * 16); | |
- rte_mov16((uint8_t *)dst + 3 * 16, (const uint8_t *)src + 3 * 16); | |
-} | |
- | |
-/** | |
- * Copy 128 bytes from one location to another, | |
- * locations should not overlap. | |
- */ | |
-static inline void | |
-rte_mov128(uint8_t *dst, const uint8_t *src) | |
-{ | |
- rte_mov16((uint8_t *)dst + 0 * 16, (const uint8_t *)src + 0 * 16); | |
- rte_mov16((uint8_t *)dst + 1 * 16, (const uint8_t *)src + 1 * 16); | |
- rte_mov16((uint8_t *)dst + 2 * 16, (const uint8_t *)src + 2 * 16); | |
- rte_mov16((uint8_t *)dst + 3 * 16, (const uint8_t *)src + 3 * 16); | |
- rte_mov16((uint8_t *)dst + 4 * 16, (const uint8_t *)src + 4 * 16); | |
- rte_mov16((uint8_t *)dst + 5 * 16, (const uint8_t *)src + 5 * 16); | |
- rte_mov16((uint8_t *)dst + 6 * 16, (const uint8_t *)src + 6 * 16); | |
- rte_mov16((uint8_t *)dst + 7 * 16, (const uint8_t *)src + 7 * 16); | |
-} | |
- | |
-/** | |
- * Copy 256 bytes from one location to another, | |
- * locations should not overlap. | |
- */ | |
-static inline void | |
-rte_mov256(uint8_t *dst, const uint8_t *src) | |
-{ | |
- rte_mov16((uint8_t *)dst + 0 * 16, (const uint8_t *)src + 0 * 16); | |
- rte_mov16((uint8_t *)dst + 1 * 16, (const uint8_t *)src + 1 * 16); | |
- rte_mov16((uint8_t *)dst + 2 * 16, (const uint8_t *)src + 2 * 16); | |
- rte_mov16((uint8_t *)dst + 3 * 16, (const uint8_t *)src + 3 * 16); | |
- rte_mov16((uint8_t *)dst + 4 * 16, (const uint8_t *)src + 4 * 16); | |
- rte_mov16((uint8_t *)dst + 5 * 16, (const uint8_t *)src + 5 * 16); | |
- rte_mov16((uint8_t *)dst + 6 * 16, (const uint8_t *)src + 6 * 16); | |
- rte_mov16((uint8_t *)dst + 7 * 16, (const uint8_t *)src + 7 * 16); | |
- rte_mov16((uint8_t *)dst + 8 * 16, (const uint8_t *)src + 8 * 16); | |
- rte_mov16((uint8_t *)dst + 9 * 16, (const uint8_t *)src + 9 * 16); | |
- rte_mov16((uint8_t *)dst + 10 * 16, (const uint8_t *)src + 10 * 16); | |
- rte_mov16((uint8_t *)dst + 11 * 16, (const uint8_t *)src + 11 * 16); | |
- rte_mov16((uint8_t *)dst + 12 * 16, (const uint8_t *)src + 12 * 16); | |
- rte_mov16((uint8_t *)dst + 13 * 16, (const uint8_t *)src + 13 * 16); | |
- rte_mov16((uint8_t *)dst + 14 * 16, (const uint8_t *)src + 14 * 16); | |
- rte_mov16((uint8_t *)dst + 15 * 16, (const uint8_t *)src + 15 * 16); | |
-} | |
- | |
-/** | |
- * Macro for copying unaligned block from one location to another with constant load offset, | |
- * 47 bytes leftover maximum, | |
- * locations should not overlap. | |
- * Requirements: | |
- * - Store is aligned | |
- * - Load offset is <offset>, which must be immediate value within [1, 15] | |
- * - For <src>, make sure <offset> bit backwards & <16 - offset> bit forwards are available for loading | |
- * - <dst>, <src>, <len> must be variables | |
- * - __m128i <xmm0> ~ <xmm8> must be pre-defined | |
- */ | |
-#define MOVEUNALIGNED_LEFT47_IMM(dst, src, len, offset) \ | |
-({ \ | |
- int tmp; \ | |
- while (len >= 128 + 16 - offset) { \ | |
- xmm0 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 0 * 16)); \ | |
- len -= 128; \ | |
- xmm1 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 1 * 16)); \ | |
- xmm2 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 2 * 16)); \ | |
- xmm3 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 3 * 16)); \ | |
- xmm4 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 4 * 16)); \ | |
- xmm5 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 5 * 16)); \ | |
- xmm6 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 6 * 16)); \ | |
- xmm7 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 7 * 16)); \ | |
- xmm8 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 8 * 16)); \ | |
- src = (const uint8_t *)src + 128; \ | |
- _mm_storeu_si128((__m128i *)((uint8_t *)dst + 0 * 16), _mm_alignr_epi8(xmm1, xmm0, offset)); \ | |
- _mm_storeu_si128((__m128i *)((uint8_t *)dst + 1 * 16), _mm_alignr_epi8(xmm2, xmm1, offset)); \ | |
- _mm_storeu_si128((__m128i *)((uint8_t *)dst + 2 * 16), _mm_alignr_epi8(xmm3, xmm2, offset)); \ | |
- _mm_storeu_si128((__m128i *)((uint8_t *)dst + 3 * 16), _mm_alignr_epi8(xmm4, xmm3, offset)); \ | |
- _mm_storeu_si128((__m128i *)((uint8_t *)dst + 4 * 16), _mm_alignr_epi8(xmm5, xmm4, offset)); \ | |
- _mm_storeu_si128((__m128i *)((uint8_t *)dst + 5 * 16), _mm_alignr_epi8(xmm6, xmm5, offset)); \ | |
- _mm_storeu_si128((__m128i *)((uint8_t *)dst + 6 * 16), _mm_alignr_epi8(xmm7, xmm6, offset)); \ | |
- _mm_storeu_si128((__m128i *)((uint8_t *)dst + 7 * 16), _mm_alignr_epi8(xmm8, xmm7, offset)); \ | |
- dst = (uint8_t *)dst + 128; \ | |
- } \ | |
- tmp = len; \ | |
- len = ((len - 16 + offset) & 127) + 16 - offset; \ | |
- tmp -= len; \ | |
- src = (const uint8_t *)src + tmp; \ | |
- dst = (uint8_t *)dst + tmp; \ | |
- if (len >= 32 + 16 - offset) { \ | |
- while (len >= 32 + 16 - offset) { \ | |
- xmm0 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 0 * 16)); \ | |
- len -= 32; \ | |
- xmm1 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 1 * 16)); \ | |
- xmm2 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 2 * 16)); \ | |
- src = (const uint8_t *)src + 32; \ | |
- _mm_storeu_si128((__m128i *)((uint8_t *)dst + 0 * 16), _mm_alignr_epi8(xmm1, xmm0, offset)); \ | |
- _mm_storeu_si128((__m128i *)((uint8_t *)dst + 1 * 16), _mm_alignr_epi8(xmm2, xmm1, offset)); \ | |
- dst = (uint8_t *)dst + 32; \ | |
- } \ | |
- tmp = len; \ | |
- len = ((len - 16 + offset) & 31) + 16 - offset; \ | |
- tmp -= len; \ | |
- src = (const uint8_t *)src + tmp; \ | |
- dst = (uint8_t *)dst + tmp; \ | |
- } \ | |
-}) | |
- | |
-/** | |
- * Macro for copying unaligned block from one location to another, | |
- * 47 bytes leftover maximum, | |
- * locations should not overlap. | |
- * Use switch here because the aligning instruction requires immediate value for shift count. | |
- * Requirements: | |
- * - Store is aligned | |
- * - Load offset is <offset>, which must be within [1, 15] | |
- * - For <src>, make sure <offset> bit backwards & <16 - offset> bit forwards are available for loading | |
- * - <dst>, <src>, <len> must be variables | |
- * - __m128i <xmm0> ~ <xmm8> used in MOVEUNALIGNED_LEFT47_IMM must be pre-defined | |
- */ | |
-#define MOVEUNALIGNED_LEFT47(dst, src, len, offset) \ | |
-({ \ | |
- switch (offset) { \ | |
- case 0x01: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x01); break; \ | |
- case 0x02: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x02); break; \ | |
- case 0x03: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x03); break; \ | |
- case 0x04: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x04); break; \ | |
- case 0x05: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x05); break; \ | |
- case 0x06: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x06); break; \ | |
- case 0x07: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x07); break; \ | |
- case 0x08: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x08); break; \ | |
- case 0x09: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x09); break; \ | |
- case 0x0A: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x0A); break; \ | |
- case 0x0B: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x0B); break; \ | |
- case 0x0C: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x0C); break; \ | |
- case 0x0D: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x0D); break; \ | |
- case 0x0E: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x0E); break; \ | |
- case 0x0F: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x0F); break; \ | |
- default:; \ | |
- } \ | |
-}) | |
- | |
-static inline void * | |
-rte_memcpy(void *dst, const void *src, size_t n) | |
-{ | |
- __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8; | |
- uintptr_t dstu = (uintptr_t)dst; | |
- uintptr_t srcu = (uintptr_t)src; | |
- void *ret = dst; | |
- size_t dstofss; | |
- size_t srcofs; | |
- | |
- /** | |
- * Copy less than 16 bytes | |
- */ | |
- if (n < 16) { | |
- if (n & 0x01) { | |
- *(uint8_t *)dstu = *(const uint8_t *)srcu; | |
- srcu = (uintptr_t)((const uint8_t *)srcu + 1); | |
- dstu = (uintptr_t)((uint8_t *)dstu + 1); | |
- } | |
- if (n & 0x02) { | |
- *(uint16_t *)dstu = *(const uint16_t *)srcu; | |
- srcu = (uintptr_t)((const uint16_t *)srcu + 1); | |
- dstu = (uintptr_t)((uint16_t *)dstu + 1); | |
- } | |
- if (n & 0x04) { | |
- *(uint32_t *)dstu = *(const uint32_t *)srcu; | |
- srcu = (uintptr_t)((const uint32_t *)srcu + 1); | |
- dstu = (uintptr_t)((uint32_t *)dstu + 1); | |
- } | |
- if (n & 0x08) { | |
- *(uint64_t *)dstu = *(const uint64_t *)srcu; | |
- } | |
- return ret; | |
- } | |
- | |
- /** | |
- * Fast way when copy size doesn't exceed 512 bytes | |
- */ | |
- if (n <= 32) { | |
- rte_mov16((uint8_t *)dst, (const uint8_t *)src); | |
- rte_mov16((uint8_t *)dst - 16 + n, (const uint8_t *)src - 16 + n); | |
- return ret; | |
- } | |
- if (n <= 48) { | |
- rte_mov32((uint8_t *)dst, (const uint8_t *)src); | |
- rte_mov16((uint8_t *)dst - 16 + n, (const uint8_t *)src - 16 + n); | |
- return ret; | |
- } | |
- if (n <= 64) { | |
- rte_mov32((uint8_t *)dst, (const uint8_t *)src); | |
- rte_mov16((uint8_t *)dst + 32, (const uint8_t *)src + 32); | |
- rte_mov16((uint8_t *)dst - 16 + n, (const uint8_t *)src - 16 + n); | |
- return ret; | |
- } | |
- if (n <= 128) { | |
- goto COPY_BLOCK_128_BACK15; | |
- } | |
- if (n <= 512) { | |
- if (n >= 256) { | |
- n -= 256; | |
- rte_mov128((uint8_t *)dst, (const uint8_t *)src); | |
- rte_mov128((uint8_t *)dst + 128, (const uint8_t *)src + 128); | |
- src = (const uint8_t *)src + 256; | |
- dst = (uint8_t *)dst + 256; | |
- } | |
-COPY_BLOCK_255_BACK15: | |
- if (n >= 128) { | |
- n -= 128; | |
- rte_mov128((uint8_t *)dst, (const uint8_t *)src); | |
- src = (const uint8_t *)src + 128; | |
- dst = (uint8_t *)dst + 128; | |
- } | |
-COPY_BLOCK_128_BACK15: | |
- if (n >= 64) { | |
- n -= 64; | |
- rte_mov64((uint8_t *)dst, (const uint8_t *)src); | |
- src = (const uint8_t *)src + 64; | |
- dst = (uint8_t *)dst + 64; | |
- } | |
-COPY_BLOCK_64_BACK15: | |
- if (n >= 32) { | |
- n -= 32; | |
- rte_mov32((uint8_t *)dst, (const uint8_t *)src); | |
- src = (const uint8_t *)src + 32; | |
- dst = (uint8_t *)dst + 32; | |
- } | |
- if (n > 16) { | |
- rte_mov16((uint8_t *)dst, (const uint8_t *)src); | |
- rte_mov16((uint8_t *)dst - 16 + n, (const uint8_t *)src - 16 + n); | |
- return ret; | |
- } | |
- if (n > 0) { | |
- rte_mov16((uint8_t *)dst - 16 + n, (const uint8_t *)src - 16 + n); | |
- } | |
- return ret; | |
- } | |
- | |
- /** | |
- * Make store aligned when copy size exceeds 512 bytes, | |
- * and make sure the first 15 bytes are copied, because | |
- * unaligned copy functions require up to 15 bytes | |
- * backwards access. | |
- */ | |
- dstofss = (uintptr_t)dst & 0x0F; | |
- if (dstofss > 0) { | |
- dstofss = 16 - dstofss + 16; | |
- n -= dstofss; | |
- rte_mov32((uint8_t *)dst, (const uint8_t *)src); | |
- src = (const uint8_t *)src + dstofss; | |
- dst = (uint8_t *)dst + dstofss; | |
- } | |
- srcofs = ((uintptr_t)src & 0x0F); | |
- | |
- /** | |
- * For aligned copy | |
- */ | |
- if (srcofs == 0) { | |
- /** | |
- * Copy 256-byte blocks | |
- */ | |
- for (; n >= 256; n -= 256) { | |
- rte_mov256((uint8_t *)dst, (const uint8_t *)src); | |
- dst = (uint8_t *)dst + 256; | |
- src = (const uint8_t *)src + 256; | |
- } | |
- | |
- /** | |
- * Copy whatever left | |
- */ | |
- goto COPY_BLOCK_255_BACK15; | |
- } | |
- | |
- /** | |
- * For copy with unaligned load | |
- */ | |
- MOVEUNALIGNED_LEFT47(dst, src, n, srcofs); | |
- | |
- /** | |
- * Copy whatever left | |
- */ | |
- goto COPY_BLOCK_64_BACK15; | |
-} | |
- | |
-#endif /* RTE_MACHINE_CPUFLAG */ | |
- | |
-#ifdef __cplusplus | |
-} | |
+#ifdef RTE_ARCH_64 | |
+#include <rte_memcpy_64.h> | |
+#else | |
+#include <rte_memcpy_32.h> | |
#endif | |
-#endif /* _RTE_MEMCPY_X86_64_H_ */ | |
+#endif /* _RTE_MEMCPY_ARM_H_ */ | |
diff --git a/include/dpdk/rte_prefetch.h b/include/dpdk/rte_prefetch.h | |
index 5dac47e..aa37de5 100644 | |
--- a/include/dpdk/rte_prefetch.h | |
+++ b/include/dpdk/rte_prefetch.h | |
@@ -1,8 +1,7 @@ | |
-/*- | |
+/* | |
* BSD LICENSE | |
* | |
- * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. | |
- * All rights reserved. | |
+ * Copyright(c) 2015 RehiveTech. All rights reserved. | |
* | |
* Redistribution and use in source and binary forms, with or without | |
* modification, are permitted provided that the following conditions | |
@@ -14,7 +13,7 @@ | |
* notice, this list of conditions and the following disclaimer in | |
* the documentation and/or other materials provided with the | |
* distribution. | |
- * * Neither the name of Intel Corporation nor the names of its | |
+ * * Neither the name of RehiveTech nor the names of its | |
* contributors may be used to endorse or promote products derived | |
* from this software without specific prior written permission. | |
* | |
@@ -31,37 +30,13 @@ | |
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
*/ | |
-#ifndef _RTE_PREFETCH_X86_64_H_ | |
-#define _RTE_PREFETCH_X86_64_H_ | |
+#ifndef _RTE_PREFETCH_ARM_H_ | |
+#define _RTE_PREFETCH_ARM_H_ | |
-#ifdef __cplusplus | |
-extern "C" { | |
+#ifdef RTE_ARCH_64 | |
+#include <rte_prefetch_64.h> | |
+#else | |
+#include <rte_prefetch_32.h> | |
#endif | |
-#include "generic/rte_prefetch.h" | |
- | |
-static inline void rte_prefetch0(const volatile void *p) | |
-{ | |
- asm volatile ("prefetcht0 %[p]" : : [p] "m" (*(const volatile char *)p)); | |
-} | |
- | |
-static inline void rte_prefetch1(const volatile void *p) | |
-{ | |
- asm volatile ("prefetcht1 %[p]" : : [p] "m" (*(const volatile char *)p)); | |
-} | |
- | |
-static inline void rte_prefetch2(const volatile void *p) | |
-{ | |
- asm volatile ("prefetcht2 %[p]" : : [p] "m" (*(const volatile char *)p)); | |
-} | |
- | |
-static inline void rte_prefetch_non_temporal(const volatile void *p) | |
-{ | |
- asm volatile ("prefetchnta %[p]" : : [p] "m" (*(const volatile char *)p)); | |
-} | |
- | |
-#ifdef __cplusplus | |
-} | |
-#endif | |
- | |
-#endif /* _RTE_PREFETCH_X86_64_H_ */ | |
+#endif /* _RTE_PREFETCH_ARM_H_ */ | |
diff --git a/include/dpdk/rte_rwlock.h b/include/dpdk/rte_rwlock.h | |
index afd1c3c..664bec8 100644 | |
--- a/include/dpdk/rte_rwlock.h | |
+++ b/include/dpdk/rte_rwlock.h | |
@@ -1,82 +1,40 @@ | |
-/*- | |
- * BSD LICENSE | |
- * | |
- * Copyright(c) 2015 Intel Corporation. All rights reserved. | |
- * All rights reserved. | |
- * | |
- * Redistribution and use in source and binary forms, with or without | |
- * modification, are permitted provided that the following conditions | |
- * are met: | |
- * | |
- * * Redistributions of source code must retain the above copyright | |
- * notice, this list of conditions and the following disclaimer. | |
- * * Redistributions in binary form must reproduce the above copyright | |
- * notice, this list of conditions and the following disclaimer in | |
- * the documentation and/or other materials provided with the | |
- * distribution. | |
- * * Neither the name of Intel Corporation nor the names of its | |
- * contributors may be used to endorse or promote products derived | |
- * from this software without specific prior written permission. | |
- * | |
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
- */ | |
+/* copied from ppc_64 */ | |
-#ifndef _RTE_RWLOCK_X86_64_H_ | |
-#define _RTE_RWLOCK_X86_64_H_ | |
+#ifndef _RTE_RWLOCK_ARM_H_ | |
+#define _RTE_RWLOCK_ARM_H_ | |
#ifdef __cplusplus | |
extern "C" { | |
#endif | |
#include "generic/rte_rwlock.h" | |
-#include "rte_spinlock.h" | |
static inline void | |
rte_rwlock_read_lock_tm(rte_rwlock_t *rwl) | |
{ | |
- if (likely(rte_try_tm(&rwl->cnt))) | |
- return; | |
rte_rwlock_read_lock(rwl); | |
} | |
static inline void | |
rte_rwlock_read_unlock_tm(rte_rwlock_t *rwl) | |
{ | |
- if (unlikely(rwl->cnt)) | |
- rte_rwlock_read_unlock(rwl); | |
- else | |
- rte_xend(); | |
+ rte_rwlock_read_unlock(rwl); | |
} | |
static inline void | |
rte_rwlock_write_lock_tm(rte_rwlock_t *rwl) | |
{ | |
- if (likely(rte_try_tm(&rwl->cnt))) | |
- return; | |
rte_rwlock_write_lock(rwl); | |
} | |
static inline void | |
rte_rwlock_write_unlock_tm(rte_rwlock_t *rwl) | |
{ | |
- if (unlikely(rwl->cnt)) | |
- rte_rwlock_write_unlock(rwl); | |
- else | |
- rte_xend(); | |
+ rte_rwlock_write_unlock(rwl); | |
} | |
#ifdef __cplusplus | |
} | |
#endif | |
-#endif /* _RTE_RWLOCK_X86_64_H_ */ | |
+#endif /* _RTE_RWLOCK_ARM_H_ */ | |
diff --git a/include/dpdk/rte_spinlock.h b/include/dpdk/rte_spinlock.h | |
index 8e630c2..396a42e 100644 | |
--- a/include/dpdk/rte_spinlock.h | |
+++ b/include/dpdk/rte_spinlock.h | |
@@ -1,8 +1,7 @@ | |
-/*- | |
+/* | |
* BSD LICENSE | |
* | |
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. | |
- * All rights reserved. | |
+ * Copyright(c) 2015 RehiveTech. All rights reserved. | |
* | |
* Redistribution and use in source and binary forms, with or without | |
* modification, are permitted provided that the following conditions | |
@@ -14,7 +13,7 @@ | |
* notice, this list of conditions and the following disclaimer in | |
* the documentation and/or other materials provided with the | |
* distribution. | |
- * * Neither the name of Intel Corporation nor the names of its | |
+ * * Neither the name of RehiveTech nor the names of its | |
* contributors may be used to endorse or promote products derived | |
* from this software without specific prior written permission. | |
* | |
@@ -31,164 +30,63 @@ | |
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
*/ | |
-#ifndef _RTE_SPINLOCK_X86_64_H_ | |
-#define _RTE_SPINLOCK_X86_64_H_ | |
+#ifndef _RTE_SPINLOCK_ARM_H_ | |
+#define _RTE_SPINLOCK_ARM_H_ | |
+ | |
+#ifndef RTE_FORCE_INTRINSICS | |
+# error Platform must be built with CONFIG_RTE_FORCE_INTRINSICS | |
+#endif | |
#ifdef __cplusplus | |
extern "C" { | |
#endif | |
+#include <rte_common.h> | |
#include "generic/rte_spinlock.h" | |
-#include "rte_rtm.h" | |
-#include "rte_cpuflags.h" | |
-#include "rte_branch_prediction.h" | |
-#include "rte_common.h" | |
- | |
-#define RTE_RTM_MAX_RETRIES (10) | |
-#define RTE_XABORT_LOCK_BUSY (0xff) | |
- | |
-#ifndef RTE_FORCE_INTRINSICS | |
-static inline void | |
-rte_spinlock_lock(rte_spinlock_t *sl) | |
-{ | |
- int lock_val = 1; | |
- asm volatile ( | |
- "1:\n" | |
- "xchg %[locked], %[lv]\n" | |
- "test %[lv], %[lv]\n" | |
- "jz 3f\n" | |
- "2:\n" | |
- "pause\n" | |
- "cmpl $0, %[locked]\n" | |
- "jnz 2b\n" | |
- "jmp 1b\n" | |
- "3:\n" | |
- : [locked] "=m" (sl->locked), [lv] "=q" (lock_val) | |
- : "[lv]" (lock_val) | |
- : "memory"); | |
-} | |
- | |
-static inline void | |
-rte_spinlock_unlock (rte_spinlock_t *sl) | |
-{ | |
- int unlock_val = 0; | |
- asm volatile ( | |
- "xchg %[locked], %[ulv]\n" | |
- : [locked] "=m" (sl->locked), [ulv] "=q" (unlock_val) | |
- : "[ulv]" (unlock_val) | |
- : "memory"); | |
-} | |
- | |
-static inline int | |
-rte_spinlock_trylock (rte_spinlock_t *sl) | |
-{ | |
- int lockval = 1; | |
- | |
- asm volatile ( | |
- "xchg %[locked], %[lockval]" | |
- : [locked] "=m" (sl->locked), [lockval] "=q" (lockval) | |
- : "[lockval]" (lockval) | |
- : "memory"); | |
- | |
- return lockval == 0; | |
-} | |
-#endif | |
- | |
-extern uint8_t rte_rtm_supported; | |
static inline int rte_tm_supported(void) | |
{ | |
- return rte_rtm_supported; | |
-} | |
- | |
-static inline int | |
-rte_try_tm(volatile int *lock) | |
-{ | |
- if (!rte_rtm_supported) | |
- return 0; | |
- | |
- int retries = RTE_RTM_MAX_RETRIES; | |
- | |
- while (likely(retries--)) { | |
- | |
- unsigned int status = rte_xbegin(); | |
- | |
- if (likely(RTE_XBEGIN_STARTED == status)) { | |
- if (unlikely(*lock)) | |
- rte_xabort(RTE_XABORT_LOCK_BUSY); | |
- else | |
- return 1; | |
- } | |
- while (*lock) | |
- rte_pause(); | |
- | |
- if ((status & RTE_XABORT_EXPLICIT) && | |
- (RTE_XABORT_CODE(status) == RTE_XABORT_LOCK_BUSY)) | |
- continue; | |
- | |
- if ((status & RTE_XABORT_RETRY) == 0) /* do not retry */ | |
- break; | |
- } | |
return 0; | |
} | |
static inline void | |
rte_spinlock_lock_tm(rte_spinlock_t *sl) | |
{ | |
- if (likely(rte_try_tm(&sl->locked))) | |
- return; | |
- | |
rte_spinlock_lock(sl); /* fall-back */ | |
} | |
static inline int | |
rte_spinlock_trylock_tm(rte_spinlock_t *sl) | |
{ | |
- if (likely(rte_try_tm(&sl->locked))) | |
- return 1; | |
- | |
return rte_spinlock_trylock(sl); | |
} | |
static inline void | |
rte_spinlock_unlock_tm(rte_spinlock_t *sl) | |
{ | |
- if (unlikely(sl->locked)) | |
- rte_spinlock_unlock(sl); | |
- else | |
- rte_xend(); | |
+ rte_spinlock_unlock(sl); | |
} | |
static inline void | |
rte_spinlock_recursive_lock_tm(rte_spinlock_recursive_t *slr) | |
{ | |
- if (likely(rte_try_tm(&slr->sl.locked))) | |
- return; | |
- | |
rte_spinlock_recursive_lock(slr); /* fall-back */ | |
} | |
static inline void | |
rte_spinlock_recursive_unlock_tm(rte_spinlock_recursive_t *slr) | |
{ | |
- if (unlikely(slr->sl.locked)) | |
- rte_spinlock_recursive_unlock(slr); | |
- else | |
- rte_xend(); | |
+ rte_spinlock_recursive_unlock(slr); | |
} | |
static inline int | |
rte_spinlock_recursive_trylock_tm(rte_spinlock_recursive_t *slr) | |
{ | |
- if (likely(rte_try_tm(&slr->sl.locked))) | |
- return 1; | |
- | |
return rte_spinlock_recursive_trylock(slr); | |
} | |
- | |
#ifdef __cplusplus | |
} | |
#endif | |
-#endif /* _RTE_SPINLOCK_X86_64_H_ */ | |
+#endif /* _RTE_SPINLOCK_ARM_H_ */ | |
diff --git a/include/dpdk/rte_vect.h b/include/dpdk/rte_vect.h | |
index b698797..a33c054 100644 | |
--- a/include/dpdk/rte_vect.h | |
+++ b/include/dpdk/rte_vect.h | |
@@ -1,8 +1,7 @@ | |
/*- | |
* BSD LICENSE | |
* | |
- * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. | |
- * All rights reserved. | |
+ * Copyright(c) 2015 Cavium Networks. All rights reserved. | |
* | |
* Redistribution and use in source and binary forms, with or without | |
* modification, are permitted provided that the following conditions | |
@@ -14,7 +13,7 @@ | |
* notice, this list of conditions and the following disclaimer in | |
* the documentation and/or other materials provided with the | |
* distribution. | |
- * * Neither the name of Intel Corporation nor the names of its | |
+ * * Neither the name of Cavium Networks nor the names of its | |
* contributors may be used to endorse or promote products derived | |
* from this software without specific prior written permission. | |
* | |
@@ -31,48 +30,16 @@ | |
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
*/ | |
-#ifndef _RTE_VECT_H_ | |
-#define _RTE_VECT_H_ | |
+#ifndef _RTE_VECT_ARM_H_ | |
+#define _RTE_VECT_ARM_H_ | |
-/** | |
- * @file | |
- * | |
- * RTE SSE/AVX related header. | |
- */ | |
- | |
-#if (defined(__ICC) || (__GNUC__ == 4 && __GNUC_MINOR__ < 4)) | |
- | |
-#ifdef __SSE__ | |
-#include <xmmintrin.h> | |
-#endif | |
- | |
-#ifdef __SSE2__ | |
-#include <emmintrin.h> | |
-#endif | |
- | |
-#ifdef __SSE3__ | |
-#include <tmmintrin.h> | |
-#endif | |
- | |
-#if defined(__SSE4_2__) || defined(__SSE4_1__) | |
-#include <smmintrin.h> | |
-#endif | |
- | |
-#if defined(__AVX__) | |
-#include <immintrin.h> | |
-#endif | |
- | |
-#else | |
- | |
-#include <x86intrin.h> | |
- | |
-#endif | |
+#include "arm_neon.h" | |
#ifdef __cplusplus | |
extern "C" { | |
#endif | |
-typedef __m128i xmm_t; | |
+typedef int32x4_t xmm_t; | |
#define XMM_SIZE (sizeof(xmm_t)) | |
#define XMM_MASK (XMM_SIZE - 1) | |
@@ -84,49 +51,33 @@ typedef union rte_xmm { | |
uint32_t u32[XMM_SIZE / sizeof(uint32_t)]; | |
uint64_t u64[XMM_SIZE / sizeof(uint64_t)]; | |
double pd[XMM_SIZE / sizeof(double)]; | |
-} rte_xmm_t; | |
+} __attribute__((aligned(16))) rte_xmm_t; | |
-#ifdef __AVX__ | |
+#ifdef RTE_ARCH_ARM | |
+/* NEON intrinsic vqtbl1q_u8() is not supported in ARMv7-A(AArch32) */ | |
+static __inline uint8x16_t | |
+vqtbl1q_u8(uint8x16_t a, uint8x16_t b) | |
+{ | |
+ uint8_t i, pos; | |
+ rte_xmm_t rte_a, rte_b, rte_ret; | |
-typedef __m256i ymm_t; | |
+ vst1q_u8(rte_a.u8, a); | |
+ vst1q_u8(rte_b.u8, b); | |
-#define YMM_SIZE (sizeof(ymm_t)) | |
-#define YMM_MASK (YMM_SIZE - 1) | |
+ for (i = 0; i < 16; i++) { | |
+ pos = rte_b.u8[i]; | |
+ if (pos < 16) | |
+ rte_ret.u8[i] = rte_a.u8[pos]; | |
+ else | |
+ rte_ret.u8[i] = 0; | |
+ } | |
-typedef union rte_ymm { | |
- ymm_t y; | |
- xmm_t x[YMM_SIZE / sizeof(xmm_t)]; | |
- uint8_t u8[YMM_SIZE / sizeof(uint8_t)]; | |
- uint16_t u16[YMM_SIZE / sizeof(uint16_t)]; | |
- uint32_t u32[YMM_SIZE / sizeof(uint32_t)]; | |
- uint64_t u64[YMM_SIZE / sizeof(uint64_t)]; | |
- double pd[YMM_SIZE / sizeof(double)]; | |
-} rte_ymm_t; | |
- | |
-#endif /* __AVX__ */ | |
- | |
-#ifdef RTE_ARCH_I686 | |
-#define _mm_cvtsi128_si64(a) ({ \ | |
- rte_xmm_t m; \ | |
- m.x = (a); \ | |
- (m.u64[0]); \ | |
-}) | |
+ return vld1q_u8(rte_ret.u8); | |
+} | |
#endif | |
-/* | |
- * Prior to version 12.1 icc doesn't support _mm_set_epi64x. | |
- */ | |
-#if (defined(__ICC) && __ICC < 1210) | |
-#define _mm_set_epi64x(a, b) ({ \ | |
- rte_xmm_t m; \ | |
- m.u64[0] = b; \ | |
- m.u64[1] = a; \ | |
- (m.x); \ | |
-}) | |
-#endif /* (defined(__ICC) && __ICC < 1210) */ | |
- | |
#ifdef __cplusplus | |
} | |
#endif | |
-#endif /* _RTE_VECT_H_ */ | |
+#endif | |
-- | |
2.1.4 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment