Skip to content

Instantly share code, notes, and snippets.

@amadio
Last active July 17, 2021 16:27
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save amadio/6066dabf2b7d98ee73aab28f0dc07bf1 to your computer and use it in GitHub Desktop.
Save amadio/6066dabf2b7d98ee73aab28f0dc07bf1 to your computer and use it in GitHub Desktop.
Workaround for compiling libffi for K1OM architecture (Intel® Xeon Phi™)
diff -ur libffi-3.2.1.orig/src/raw_api.c libffi-3.2.1/src/raw_api.c
--- libffi-3.2.1.orig/src/raw_api.c 2016-08-11 10:02:18.561329225 -0300
+++ libffi-3.2.1/src/raw_api.c 2016-08-11 10:03:43.853948157 -0300
@@ -29,7 +29,7 @@
#include <ffi.h>
#include <ffi_common.h>
-#if !FFI_NO_RAW_API
+//#if !FFI_NO_RAW_API
size_t
ffi_raw_size (ffi_cif *cif)
@@ -191,7 +191,7 @@
}
}
-#if !FFI_NATIVE_RAW_API
+//#if !FFI_NATIVE_RAW_API
/* This is a generic definition of ffi_raw_call, to be used if the
@@ -208,7 +208,7 @@
ffi_call (cif, fn, rvalue, avalue);
}
-#if FFI_CLOSURES /* base system provides closures */
+//#if FFI_CLOSURES /* base system provides closures */
static void
ffi_translate_args (ffi_cif *cif, void *rvalue,
@@ -244,10 +244,10 @@
return status;
}
-#endif /* FFI_CLOSURES */
-#endif /* !FFI_NATIVE_RAW_API */
+//#endif /* FFI_CLOSURES */
+//#endif /* !FFI_NATIVE_RAW_API */
-#if FFI_CLOSURES
+//#if FFI_CLOSURES
/* Again, here is the generic version of ffi_prep_raw_closure, which
* will install an intermediate "hub" for translation of arguments from
@@ -262,6 +262,6 @@
return ffi_prep_raw_closure_loc (cl, cif, fun, user_data, cl);
}
-#endif /* FFI_CLOSURES */
+//#endif /* FFI_CLOSURES */
-#endif /* !FFI_NO_RAW_API */
+//#endif /* !FFI_NO_RAW_API */
Somente em libffi-3.2.1/src: raw_api.c.orig
diff -ur libffi-3.2.1.orig/src/x86/ffi64.c libffi-3.2.1/src/x86/ffi64.c
--- libffi-3.2.1.orig/src/x86/ffi64.c 2016-08-11 10:02:18.557329149 -0300
+++ libffi-3.2.1/src/x86/ffi64.c 2016-08-11 10:03:43.853948157 -0300
@@ -1,10 +1,8 @@
/* -----------------------------------------------------------------------
- ffi64.c - Copyright (c) 2013 The Written Word, Inc.
- Copyright (c) 2011 Anthony Green
- Copyright (c) 2008, 2010 Red Hat, Inc.
- Copyright (c) 2002, 2007 Bo Thorsen <bo@suse.de>
-
- x86-64 Foreign Function Interface
+ ffi64.c - Copyright (c) 2002, 2007 Bo Thorsen <bo@suse.de>
+ Copyright (c) 2008 Red Hat, Inc.
+
+ x86-64 Foreign Function Interface
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
@@ -38,30 +36,13 @@
#define MAX_GPR_REGS 6
#define MAX_SSE_REGS 8
-#if defined(__INTEL_COMPILER)
-#include "xmmintrin.h"
-#define UINT128 __m128
-#else
-#if defined(__SUNPRO_C)
-#include <sunmedia_types.h>
-#define UINT128 __m128i
-#else
-#define UINT128 __int128_t
-#endif
-#endif
-
-union big_int_union
-{
- UINT32 i32;
- UINT64 i64;
- UINT128 i128;
-};
+typedef struct { int64_t m[8]; } __int512_t;
struct register_args
{
/* Registers for argument passing. */
UINT64 gpr[MAX_GPR_REGS];
- union big_int_union sse[MAX_SSE_REGS];
+ __int512_t sse[MAX_SSE_REGS];
};
extern void ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags,
@@ -152,7 +133,7 @@
See the x86-64 PS ABI for details.
*/
-static size_t
+static int
classify_argument (ffi_type *type, enum x86_64_reg_class classes[],
size_t byte_offset)
{
@@ -168,7 +149,7 @@
case FFI_TYPE_SINT64:
case FFI_TYPE_POINTER:
{
- size_t size = byte_offset + type->size;
+ int size = byte_offset + type->size;
if (size <= 4)
{
@@ -203,17 +184,15 @@
case FFI_TYPE_DOUBLE:
classes[0] = X86_64_SSEDF_CLASS;
return 1;
-#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
case FFI_TYPE_LONGDOUBLE:
classes[0] = X86_64_X87_CLASS;
classes[1] = X86_64_X87UP_CLASS;
return 2;
-#endif
case FFI_TYPE_STRUCT:
{
- const size_t UNITS_PER_WORD = 8;
- size_t words = (type->size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
- ffi_type **ptr;
+ const int UNITS_PER_WORD = 8;
+ int words = (type->size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+ ffi_type **ptr;
int i;
enum x86_64_reg_class subclasses[MAX_CLASSES];
@@ -235,7 +214,7 @@
/* Merge the fields of structure. */
for (ptr = type->elements; *ptr != NULL; ptr++)
{
- size_t num;
+ int num;
byte_offset = ALIGN (byte_offset, (*ptr)->alignment);
@@ -244,7 +223,7 @@
return 0;
for (i = 0; i < num; i++)
{
- size_t pos = byte_offset / 8;
+ int pos = byte_offset / 8;
classes[i + pos] =
merge_classes (subclasses[i], classes[i + pos]);
}
@@ -308,12 +287,11 @@
class. Return zero iff parameter should be passed in memory, otherwise
the number of registers. */
-static size_t
+static int
examine_argument (ffi_type *type, enum x86_64_reg_class classes[MAX_CLASSES],
_Bool in_return, int *pngpr, int *pnsse)
{
- size_t n;
- int i, ngpr, nsse;
+ int i, n, ngpr, nsse;
n = classify_argument (type, classes, 0);
if (n == 0)
@@ -354,9 +332,9 @@
ffi_status
ffi_prep_cif_machdep (ffi_cif *cif)
{
- int gprcount, ssecount, i, avn, ngpr, nsse, flags;
+ int gprcount, ssecount, i, avn, n, ngpr, nsse, flags;
enum x86_64_reg_class classes[MAX_CLASSES];
- size_t bytes, n;
+ size_t bytes;
gprcount = ssecount = 0;
@@ -402,7 +380,7 @@
if (align < 8)
align = 8;
- bytes = ALIGN (bytes, align);
+ bytes = ALIGN(bytes, align);
bytes += cif->arg_types[i]->size;
}
else
@@ -414,7 +392,7 @@
if (ssecount)
flags |= 1 << 11;
cif->flags = flags;
- cif->bytes = (unsigned)ALIGN (bytes, 8);
+ cif->bytes = bytes;
return FFI_OK;
}
@@ -450,14 +428,15 @@
/* If the return value is passed in memory, add the pointer as the
first integer argument. */
if (ret_in_memory)
- reg_args->gpr[gprcount++] = (unsigned long) rvalue;
+ reg_args->gpr[gprcount++] = (long) rvalue;
avn = cif->nargs;
arg_types = cif->arg_types;
for (i = 0; i < avn; ++i)
{
- size_t n, size = arg_types[i]->size;
+ size_t size = arg_types[i]->size;
+ int n;
n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse);
if (n == 0
@@ -487,33 +466,32 @@
{
case X86_64_INTEGER_CLASS:
case X86_64_INTEGERSI_CLASS:
- /* Sign-extend integer arguments passed in general
- purpose registers, to cope with the fact that
- LLVM incorrectly assumes that this will be done
- (the x86-64 PS ABI does not specify this). */
- switch (arg_types[i]->type)
- {
- case FFI_TYPE_SINT8:
- *(SINT64 *)&reg_args->gpr[gprcount] = (SINT64) *((SINT8 *) a);
- break;
- case FFI_TYPE_SINT16:
- *(SINT64 *)&reg_args->gpr[gprcount] = (SINT64) *((SINT16 *) a);
- break;
- case FFI_TYPE_SINT32:
- *(SINT64 *)&reg_args->gpr[gprcount] = (SINT64) *((SINT32 *) a);
- break;
- default:
- reg_args->gpr[gprcount] = 0;
- memcpy (&reg_args->gpr[gprcount], a, size < 8 ? size : 8);
- }
+ reg_args->gpr[gprcount] = 0;
+ memcpy (&reg_args->gpr[gprcount], a, size < 8 ? size : 8);
gprcount++;
break;
case X86_64_SSE_CLASS:
case X86_64_SSEDF_CLASS:
- reg_args->sse[ssecount++].i64 = *(UINT64 *) a;
+ reg_args->sse[ssecount].m[0] = *(UINT64 *) a;
+ reg_args->sse[ssecount].m[1] = 0;
+ reg_args->sse[ssecount].m[2] = 0;
+ reg_args->sse[ssecount].m[3] = 0;
+ reg_args->sse[ssecount].m[4] = 0;
+ reg_args->sse[ssecount].m[5] = 0;
+ reg_args->sse[ssecount].m[6] = 0;
+ reg_args->sse[ssecount].m[7] = 0;
+ ssecount++;
break;
case X86_64_SSESF_CLASS:
- reg_args->sse[ssecount++].i32 = *(UINT32 *) a;
+ reg_args->sse[ssecount].m[0] = *(UINT32 *) a;
+ reg_args->sse[ssecount].m[1] = 0;
+ reg_args->sse[ssecount].m[2] = 0;
+ reg_args->sse[ssecount].m[3] = 0;
+ reg_args->sse[ssecount].m[4] = 0;
+ reg_args->sse[ssecount].m[5] = 0;
+ reg_args->sse[ssecount].m[6] = 0;
+ reg_args->sse[ssecount].m[7] = 0;
+ ssecount++;
break;
default:
abort();
@@ -538,21 +516,12 @@
{
volatile unsigned short *tramp;
- /* Sanity check on the cif ABI. */
- {
- int abi = cif->abi;
- if (UNLIKELY (! (abi > FFI_FIRST_ABI && abi < FFI_LAST_ABI)))
- return FFI_BAD_ABI;
- }
-
tramp = (volatile unsigned short *) &closure->tramp[0];
tramp[0] = 0xbb49; /* mov <code>, %r11 */
- *((unsigned long long * volatile) &tramp[1])
- = (unsigned long) ffi_closure_unix64;
+ *(void * volatile *) &tramp[1] = ffi_closure_unix64;
tramp[5] = 0xba49; /* mov <data>, %r10 */
- *((unsigned long long * volatile) &tramp[6])
- = (unsigned long) codeloc;
+ *(void * volatile *) &tramp[6] = codeloc;
/* Set the carry bit iff the function uses any sse registers.
This is clc or stc, together with the first byte of the jmp. */
@@ -586,12 +555,12 @@
if (ret != FFI_TYPE_VOID)
{
enum x86_64_reg_class classes[MAX_CLASSES];
- size_t n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse);
+ int n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse);
if (n == 0)
{
/* The return value goes in memory. Arrange for the closure
return value to go directly back to the original caller. */
- rvalue = (void *) (unsigned long) reg_args->gpr[gprcount++];
+ rvalue = (void *) reg_args->gpr[gprcount++];
/* We don't have to do anything in asm for the return. */
ret = FFI_TYPE_VOID;
}
@@ -609,11 +578,11 @@
avn = cif->nargs;
arg_types = cif->arg_types;
-
+
for (i = 0; i < avn; ++i)
{
enum x86_64_reg_class classes[MAX_CLASSES];
- size_t n;
+ int n;
n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse);
if (n == 0
@@ -652,7 +621,7 @@
/* Otherwise, allocate space to make them consecutive. */
else
{
- char *a = alloca (16);
+ char *a = alloca (64);
int j;
avalue[i] = a;
diff -ur libffi-3.2.1.orig/src/x86/unix64.S libffi-3.2.1/src/x86/unix64.S
--- libffi-3.2.1.orig/src/x86/unix64.S 2016-08-11 10:02:18.557329149 -0300
+++ libffi-3.2.1/src/x86/unix64.S 2016-08-11 10:03:43.854948176 -0300
@@ -1,7 +1,6 @@
/* -----------------------------------------------------------------------
- unix64.S - Copyright (c) 2013 The Written Word, Inc.
- - Copyright (c) 2008 Red Hat, Inc
- - Copyright (c) 2002 Bo Thorsen <bo@suse.de>
+ unix64.S - Copyright (c) 2002 Bo Thorsen <bo@suse.de>
+ Copyright (c) 2008 Red Hat, Inc
x86-64 Foreign Function Interface
@@ -24,8 +23,17 @@
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
+
+
+ PORT TO THE INTEL MIC ARCHITECTURE:
+ EMILIO CASTILLO VILLAR
+ CRISTOBAL CAMARERO COTERILLO
+
+ UNIVERSITY OF CANTABRIA
+ SPAIN
----------------------------------------------------------------------- */
+/
#ifdef __x86_64__
#define LIBFFI_ASM
#include <fficonfig.h>
@@ -70,7 +78,7 @@
.Lret_from_load_sse:
/* Deallocate the reg arg area. */
- leaq 176(%r10), %rsp
+ leaq 560(%r10), %rsp
/* Call the user function. */
call *%r11
@@ -146,11 +154,20 @@
.align 2
.Lst_float:
- movss %xmm0, (%rdi)
+
+ movl $1, %eax
+ kmov %eax, %k1
+ vpackstorelps %zmm0, (%rdi){%k1}
+ vpackstorehps %zmm0, 64(%rdi){%k1}
+ /*movss %xmm0, (%rdi)*/
ret
.align 2
.Lst_double:
- movsd %xmm0, (%rdi)
+ movl $1, %eax
+ kmov %eax, %k1
+ vpackstorelpd %zmm0, (%rdi){%k1}
+ vpackstorehpd %zmm0, 64(%rdi){%k1}
+ /*movsd %xmm0, (%rdi)*/
ret
.Lst_ldouble:
fstpt (%rdi)
@@ -165,16 +182,39 @@
value to a 16 byte scratch area first. Bits 8, 9, and 10
control where the values are located. Only one of the three
bits will be set; see ffi_prep_cif_machdep for the pattern. */
- movd %xmm0, %r10
- movd %xmm1, %r11
+
+
+ movq %rax, %r10
+ movl $1, %eax
+ kmov %eax, %k1
+ movq %r10, %rax
+
+ vpackstorelpd %zmm0, -200(%rsp){%k1}
+ vpackstorehpd %zmm0, -136(%rsp){%k1}
+ movq -200(%rsp), %r10
+
+
+ vpackstorelpd %zmm1, -200(%rsp){%k1}
+ vpackstorehpd %zmm1, -136(%rsp){%k1}
+ movq -200(%rsp), %r11
+
+ /*movd %zmm0, %r10
+ movd %zmm1, %r11*/
testl $0x100, %ecx
- cmovnz %rax, %rdx
- cmovnz %r10, %rax
+ jz .Lst_struct_n1
+ movq %rax, %rdx
+ movq %r10, %rax
+.Lst_struct_n1:
+
testl $0x200, %ecx
- cmovnz %r10, %rdx
+ jz .Lst_struct_n2
+ movq %r10, %rdx
+.Lst_struct_n2:
testl $0x400, %ecx
- cmovnz %r10, %rax
- cmovnz %r11, %rdx
+ jz .Lst_struct_n3
+ movq %r10, %rax
+ movq %r11, %rdx
+.Lst_struct_n3:
movq %rax, (%rsi)
movq %rdx, 8(%rsi)
@@ -190,14 +230,33 @@
.align 2
.LUW3:
.Lload_sse:
- movdqa 48(%r10), %xmm0
- movdqa 64(%r10), %xmm1
- movdqa 80(%r10), %xmm2
- movdqa 96(%r10), %xmm3
- movdqa 112(%r10), %xmm4
- movdqa 128(%r10), %xmm5
- movdqa 144(%r10), %xmm6
- movdqa 160(%r10), %xmm7
+
+ vloadunpacklq 48(%r10), %zmm0
+ vloadunpacklq 112(%r10), %zmm1
+ vloadunpacklq 176(%r10), %zmm2
+ vloadunpacklq 240(%r10), %zmm3
+ vloadunpacklq 304(%r10), %zmm4
+ vloadunpacklq 368(%r10), %zmm5
+ vloadunpacklq 432(%r10), %zmm6
+ vloadunpacklq 496(%r10), %zmm7
+
+ vloadunpackhq 112(%r10), %zmm0
+ vloadunpackhq 176(%r10), %zmm1
+ vloadunpackhq 240(%r10), %zmm2
+ vloadunpackhq 304(%r10), %zmm3
+ vloadunpackhq 368(%r10), %zmm4
+ vloadunpackhq 432(%r10), %zmm5
+ vloadunpackhq 496(%r10), %zmm6
+ vloadunpackhq 560(%r10), %zmm7
+
+ /*vmovaps 48(%r10), %zmm0
+ vmovaps 112(%r10), %zmm1
+ vmovaps 176(%r10), %zmm2
+ vmovaps 240(%r10), %zmm3
+ vmovaps 304(%r10), %zmm4
+ vmovaps 368(%r10), %zmm5
+ vmovaps 432(%r10), %zmm6
+ vmovaps 496(%r10), %zmm7*/
jmp .Lret_from_load_sse
.LUW4:
@@ -211,7 +270,7 @@
.LUW5:
/* The carry flag is set by the trampoline iff SSE registers
are used. Don't clobber it before the branch instruction. */
- leaq -200(%rsp), %rsp
+ leaq -584(%rsp), %rsp
.LUW6:
movq %rdi, (%rsp)
movq %rsi, 8(%rsp)
@@ -223,13 +282,13 @@
.Lret_from_save_sse:
movq %r10, %rdi
- leaq 176(%rsp), %rsi
+ leaq 560(%rsp), %rsi
movq %rsp, %rdx
- leaq 208(%rsp), %rcx
+ leaq 592(%rsp), %rcx
call ffi_closure_unix64_inner@PLT
/* Deallocate stack frame early; return value is now in redzone. */
- addq $200, %rsp
+ addq $584, %rsp
.LUW7:
/* The first byte of the return value contains the FFI_TYPE. */
@@ -279,11 +338,13 @@
.align 2
.Lld_float:
- movss -24(%rsp), %xmm0
+ vbroadcastss -24(%rsp), %zmm0
+ /*movss -24(%rsp), %xmm0*/
ret
.align 2
.Lld_double:
- movsd -24(%rsp), %xmm0
+ vbroadcastsd -24(%rsp), %zmm0
+ /*movsd -24(%rsp), %xmm0*/
ret
.align 2
.Lld_ldouble:
@@ -299,40 +360,61 @@
that rax gets the second word. */
movq -24(%rsp), %rcx
movq -16(%rsp), %rdx
- movq -16(%rsp), %xmm1
+ vbroadcastsd -16(%rsp), %zmm1
+ /*movq -16(%rsp), %xmm1*/
testl $0x100, %eax
- cmovnz %rdx, %rcx
- movd %rcx, %xmm0
- testl $0x200, %eax
+ jz .Lld_struct_1
+
+ movq %rdx, %rcx
+.Lld_struct_1:
+ subq $8, %rsp
+ movq %rcx, (%rsp)
+ addq $8, %rsp
+ vbroadcastss (%rsp), %zmm0
+
+ /*movd %rcx, %zmm0*/
movq -24(%rsp), %rax
- cmovnz %rdx, %rax
+ testl $0x200, %eax
+ jz .Lld_struct_2
+ movq %rdx, %rax
+.Lld_struct_2:
ret
/* See the comment above .Lload_sse; the same logic applies here. */
.align 2
.LUW8:
.Lsave_sse:
- movdqa %xmm0, 48(%rsp)
- movdqa %xmm1, 64(%rsp)
- movdqa %xmm2, 80(%rsp)
- movdqa %xmm3, 96(%rsp)
- movdqa %xmm4, 112(%rsp)
- movdqa %xmm5, 128(%rsp)
- movdqa %xmm6, 144(%rsp)
- movdqa %xmm7, 160(%rsp)
+ vpackstorelq %zmm0, 48(%rsp)
+ vpackstorelq %zmm1, 112(%rsp)
+ vpackstorelq %zmm2, 176(%rsp)
+ vpackstorelq %zmm3, 240(%rsp)
+ vpackstorelq %zmm4, 304(%rsp)
+ vpackstorelq %zmm5, 368(%rsp)
+ vpackstorelq %zmm6, 432(%rsp)
+ vpackstorelq %zmm7, 496(%rsp)
+
+ vpackstorehq %zmm0, 112(%rsp)
+ vpackstorehq %zmm1, 176(%rsp)
+ vpackstorehq %zmm2, 240(%rsp)
+ vpackstorehq %zmm3, 304(%rsp)
+ vpackstorehq %zmm4, 368(%rsp)
+ vpackstorehq %zmm5, 432(%rsp)
+ vpackstorehq %zmm6, 496(%rsp)
+ vpackstorehq %zmm7, 560(%rsp)
+ /*vmovaps %zmm0, 48(%rsp)
+ vmovaps %zmm1, 112(%rsp)
+ vmovaps %zmm2, 176(%rsp)
+ vmovaps %zmm3, 240(%rsp)
+ vmovaps %zmm4, 304(%rsp)
+ vmovaps %zmm5, 368(%rsp)
+ vmovaps %zmm6, 432(%rsp)
+ vmovaps %zmm7, 496(%rsp) */
jmp .Lret_from_save_sse
.LUW9:
.size ffi_closure_unix64,.-ffi_closure_unix64
-#ifdef __GNUC__
-/* Only emit DWARF unwind info when building with the GNU toolchain. */
-
-#ifdef HAVE_AS_X86_64_UNWIND_SECTION_TYPE
- .section .eh_frame,"a",@unwind
-#else
.section .eh_frame,"a",@progbits
-#endif
.Lframe1:
.long .LECIE1-.LSCIE1 /* CIE Length */
.LSCIE1:
@@ -423,8 +505,6 @@
.align 8
.LEFDE3:
-#endif /* __GNUC__ */
-
#endif /* __x86_64__ */
#if defined __ELF__ && defined __linux__
Somente em libffi-3.2.1/src/x86: unix64.S.orig
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment