Last active
January 22, 2016 11:39
-
-
Save ynkdir/effcb1301e27a5133b42 to your computer and use it in GitHub Desktop.
libcallex windows
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/autoload/Makefile.msc b/autoload/Makefile.msc | |
index d0907f2..ef13346 100644 | |
--- a/autoload/Makefile.msc | |
+++ b/autoload/Makefile.msc | |
@@ -1,5 +1,20 @@ | |
+!if "$(ARCH)" == "x64" | |
+ASMFILE=msc_x64_call | |
+!elseif "$(ARCH)" == "x86" | |
+ASMFILE=msc_x86_call | |
+!else | |
+!error set ARCH=x64|x86 | |
+!endif | |
+ | |
all : libcallex.dll | |
@echo done | |
-libcallex.dll : libcallex.cxx | |
- cl /LD /EHsc libcallex.cxx libffi.lib | |
+libcallex.dll : libcallex.cxx $(ASMFILE).obj | |
+ cl /LD /EHsc libcallex.cxx $(ASMFILE).obj | |
+ | |
+msc_x64_call.obj : msc_x64_call.asm | |
+ ml64 /c msc_x64_call.asm | |
+ | |
+msc_x86_call.obj : msc_x86_call.asm | |
+ ml /c msc_x86_call.asm | |
+ | |
diff --git a/autoload/Makefile.w32 b/autoload/Makefile.w32 | |
index dd3f22d..0404bdd 100644 | |
--- a/autoload/Makefile.w32 | |
+++ b/autoload/Makefile.w32 | |
@@ -1,5 +1,16 @@ | |
+ | |
+ifeq ($(ARCH), x64) | |
+ASMFILE=mingw_x64_call.s | |
+else | |
+ifeq ($(ARCH), x86) | |
+ASMFILE=mingw_x86_call.s | |
+else | |
+$(error set ARCH=x64|x86) | |
+endif | |
+endif | |
+ | |
all : libcallex.dll | |
@echo done | |
-libcallex.dll : libcallex.cxx | |
- g++ -static-libgcc -static-libstdc++ -shared -o libcallex.dll libcallex.cxx -lffi | |
+libcallex.dll : libcallex.cxx $(ASMFILE) | |
+ g++ -static -static-libgcc -static-libstdc++ -shared -o libcallex.dll libcallex.cxx $(ASMFILE) | |
diff --git a/autoload/libcallex.cxx b/autoload/libcallex.cxx | |
index 6f667cb..3ca98ce 100644 | |
--- a/autoload/libcallex.cxx | |
+++ b/autoload/libcallex.cxx | |
@@ -85,86 +85,23 @@ const char* libcallex_call(const char* context) { | |
} | |
narg++; | |
} | |
-#if defined(_WIN64) && defined(_MVC_VER) | |
- // XXX: NOT TESTED | |
- // XXX: replace push to mov | |
- // at lease 32 byte, aligned to 16 byte | |
- INTPTR_T stackroom = 32; | |
- if (narg > 4) | |
- stackroom += 16 * (narg % 2); | |
- _asm sub rsp, stackroom | |
- for (unsigned long n = narg; n > 4; n--) { | |
- INTPTR_T a_ = args[n - 1]; | |
- _asm push a_ | |
- } | |
- if (narg > 3) { | |
- INTPTR_T a_ = args[3]; | |
- _asm mov a_, r9 | |
- } | |
- if (narg > 2) { | |
- INTPTR_T a_ = args[2]; | |
- _asm mov a_, r8 | |
- } | |
- if (narg > 1) { | |
- INTPTR_T a_ = args[1]; | |
- _asm mov a_, rdx | |
- } | |
- if (narg > 0) { | |
- INTPTR_T a_ = args[0]; | |
- _asm mov a_, rcx | |
- } | |
- _asm { | |
- call p_ | |
- mov r_, rax | |
- } | |
- if (narg > 4) { | |
- INTPTR_T a_ = (narg - 4) * sizeof(void *); | |
- _asm add rsp, a_ | |
- } | |
- _asm add rsp, stackroom | |
-#elif defined(_WIN32) && defined(_MVC_VER) | |
- for (unsigned long n = 0; n < narg; n++) { | |
- INTPTR_T a_ = args[narg-n-1]; | |
- _asm { | |
- mov eax, a_ | |
- push eax | |
- } | |
- } | |
- _asm { | |
- call p_ | |
- mov r_, eax | |
- } | |
-#elif defined(_WIN64) && defined(___GNUC__) | |
- // XXX: NOT TESTED | |
- // XXX: replace push to mov | |
- // at lease 32 byte, aligned to 16 byte | |
- INTPTR_T stackroom = 32; | |
- if (narg > 4) | |
- stackroom += 16 * (narg % 2); | |
- __asm__ ("subq %0, %%rsp"::"r"(stackroom)); | |
- for (unsigned long n = narg; n > 4; n--) | |
- __asm__ ("pushq %0"::"r"(args[n-1])); | |
- if (narg > 3) __asm__ ("movq %0, %%r9"::"r"(args[3])); | |
- if (narg > 2) __asm__ ("movq %0, %%r8"::"r"(args[2])); | |
- if (narg > 1) __asm__ ("movq %0, %%rdx"::"r"(args[1])); | |
- if (narg > 0) __asm__ ("movq %0, %%rcx"::"r"(args[0])); | |
- __asm__ ("call %0":"=r"(r_):"r"(p_)); | |
- if (narg > 4) | |
- __asm__ ("addq %0, %%rsp"::"r"((narg - 4) * sizeof(void*))); | |
- __asm__ ("addq %0, %%rsp"::"r"(stackroom)); | |
+#if defined(_WIN64) && defined(_MSC_VER) | |
+ // NOTE: Vim's Number is 32bit. We can not handle 64bit pointer as Number. | |
+ // FIXME: double is not supported | |
+ extern intptr_t msc_x64_call(FUNCTION p, long narg, INTPTR_T* args); | |
+ r_ = msc_x64_call(p_, narg, args); | |
+#elif defined(_WIN32) && defined(_MSC_VER) | |
+ // FIXME: double is not supported | |
+ extern intptr_t msc_x86_call(FUNCTION p, long narg, INTPTR_T* args); | |
+ r_ = msc_x86_call(p_, narg, args); | |
+#elif defined(_WIN64) && defined(__GNUC__) | |
+ // FIXME: double is not supported | |
+ extern intptr_t mingw_x64_call(FUNCTION p, long narg, INTPTR_T* args); | |
+ r_ = mingw_x64_call(p_, narg, args); | |
#elif defined(_WIN32) && defined(__GNUC__) | |
- for (unsigned long n = 0; n < narg; n++) { | |
- INTPTR_T a_ = args[narg-n-1]; | |
- __asm__ ( | |
- "push %0" | |
- ::"r"(a_) | |
- ); | |
- } | |
- __asm__ ( | |
- "call %0" | |
- :"=r"(r_) | |
- :"r"(p_) | |
- ); | |
+ // FIXME: double is not supported | |
+ extern intptr_t mingw_x86_call(FUNCTION p, long narg, INTPTR_T* args); | |
+ r_ = mingw_x86_call(p_, narg, args); | |
#elif defined(__linux__) && defined(__x86_64__) && defined(__GNUC__) | |
for (unsigned long n = narg; n > 6; n--) | |
__asm__ ("pushq %0"::"r"(args[n-1])); | |
diff --git a/autoload/libcallex.dll b/autoload/libcallex.dll | |
index cf4d871..ba4a068 100644 | |
Binary files a/autoload/libcallex.dll and b/autoload/libcallex.dll differ | |
diff --git a/autoload/mingw_x64_call.s b/autoload/mingw_x64_call.s | |
new file mode 100644 | |
index 0000000..b069e3b | |
--- /dev/null | |
+++ b/autoload/mingw_x64_call.s | |
@@ -0,0 +1,69 @@ | |
+ .text | |
+ .global mingw_x64_call | |
+ .def mingw_x64_call | |
+# intptr_t mingw_x64_call(FUNCTION p, long narg, INTPTR_T* args) | |
+args$ = 32 | |
+narg$ = 24 | |
+p$ = 16 | |
+mingw_x64_call: | |
+ .cfi_startproc | |
+ pushq %rbp | |
+ movq %rsp, %rbp | |
+ movq %rcx, p$(%rbp) | |
+ movq %rdx, narg$(%rbp) | |
+ movq %r8, args$(%rbp) | |
+ # stacksize is at lease 32 byte, aligned to 16 byte | |
+ # cutting corners with (4 + narg * 2) * 8 | |
+ leaq 4(,%rdx,2), %rdx | |
+ leaq (,%rdx,8), %rdx | |
+ sub %rdx, %rsp | |
+ # while narg >= 5: | |
+ # narg-- | |
+ # rsp[narg] = args[narg] | |
+ # if narg > 3: | |
+ # r9 = args[3] | |
+ # if narg > 2: | |
+ # r8 = args[2] | |
+ # if narg > 1: | |
+ # rdx = args[1] | |
+ # if narg > 0: | |
+ # rcx = args[0] | |
+ movq narg$(%rbp), %rcx | |
+argN: | |
+ cmpq $5, %rcx | |
+ jl arg4 | |
+ dec %rcx | |
+ movq args$(%rbp), %rax | |
+ movq (%rax,%rcx,8), %rax | |
+ movq %rax, (%rsp,%rcx,8) | |
+ jmp argN | |
+arg4: | |
+ cmpq $4, %rcx | |
+ jl arg3 | |
+ movq args$(%rbp), %rax | |
+ movq 24(%rax), %rax | |
+ movq %rax, %r9 | |
+arg3: | |
+ cmpq $3, %rcx | |
+ jl arg2 | |
+ movq args$(%rbp), %rax | |
+ movq 16(%rax), %rax | |
+ movq %rax, %r8 | |
+arg2: | |
+ cmpq $2, %rcx | |
+ jl arg1 | |
+ movq args$(%rbp), %rax | |
+ movq 8(%rax), %rax | |
+ movq %rax, %rdx | |
+arg1: | |
+ cmpq $1, %rcx | |
+ jl docall | |
+ movq args$(%rbp), %rax | |
+ movq (%rax), %rax | |
+ movq %rax, %rcx | |
+docall: | |
+ call *p$(%rbp) | |
+ movq %rbp, %rsp | |
+ popq %rbp | |
+ ret | |
+ .cfi_endproc | |
diff --git a/autoload/mingw_x86_call.s b/autoload/mingw_x86_call.s | |
new file mode 100644 | |
index 0000000..d19d84c | |
--- /dev/null | |
+++ b/autoload/mingw_x86_call.s | |
@@ -0,0 +1,27 @@ | |
+ .text | |
+ .global _mingw_x86_call | |
+ .def _mingw_x86_call | |
+# intptr_t mingw_x86_call(FUNCTION p, long narg, INTPTR_T* args) | |
+args$ = 16 | |
+narg$ = 12 | |
+p$ = 8 | |
+_mingw_x86_call: | |
+ .cfi_startproc | |
+ pushl %ebp | |
+ movl %esp, %ebp | |
+ # while narg > 0: | |
+ # push args[--narg] | |
+ movl narg$(%ebp), %ecx | |
+argN: | |
+ cmpl $0, %ecx | |
+ jle docall | |
+ dec %ecx | |
+ movl args$(%ebp), %eax | |
+ pushl (%eax,%ecx,4) | |
+ jmp argN | |
+docall: | |
+ call *p$(%ebp) | |
+ movl %ebp, %esp | |
+ popl %ebp | |
+ ret | |
+ .cfi_endproc | |
diff --git a/autoload/msc_x64_call.asm b/autoload/msc_x64_call.asm | |
new file mode 100644 | |
index 0000000..d5744b3 | |
--- /dev/null | |
+++ b/autoload/msc_x64_call.asm | |
@@ -0,0 +1,71 @@ | |
+PUBLIC msc_x64_call | |
+ | |
+_TEXT SEGMENT | |
+; intptr_t msc_x64_call(FUNCTION p, long narg, INTPTR_T* args) | |
+args$ = 32 | |
+narg$ = 24 | |
+p$ = 16 | |
+msc_x64_call proc | |
+ push rbp | |
+ mov rbp, rsp | |
+ mov p$[rbp], rcx | |
+ mov narg$[rbp], rdx | |
+ mov args$[rbp], r8 | |
+ ; stacksize is at lease 32 byte, aligned to 16 byte | |
+ ; cutting corners with (4 + narg * 2) * 8 | |
+ lea rdx, [rdx*2+4] | |
+ lea rdx, [rdx*8] | |
+ sub rsp, rdx | |
+ ; while narg >= 5: | |
+ ; narg-- | |
+ ; rsp[narg] = args[narg] | |
+ ; if narg > 3: | |
+ ; r9 = args[3] | |
+ ; if narg > 2: | |
+ ; r8 = args[2] | |
+ ; if narg > 1: | |
+ ; rdx = args[1] | |
+ ; if narg > 0: | |
+ ; rcx = args[0] | |
+ mov rcx, narg$[rbp] | |
+argN: | |
+ cmp rcx, 5 | |
+ jl arg4 | |
+ dec rcx | |
+ mov rax, args$[rbp] | |
+ mov rax, [rax+rcx*8] | |
+ mov [rsp+rcx*8], rax | |
+ jmp argN | |
+arg4: | |
+ cmp rcx, 4 | |
+ jl arg3 | |
+ mov rax, args$[rbp] | |
+ mov rax, [rax+24] | |
+ mov r9, rax | |
+arg3: | |
+ cmp rcx, 3 | |
+ jl arg2 | |
+ mov rax, args$[rbp] | |
+ mov rax, [rax+16] | |
+ mov r8, rax | |
+arg2: | |
+ cmp rcx, 2 | |
+ jl arg1 | |
+ mov rax, args$[rbp] | |
+ mov rax, [rax+8] | |
+ mov rdx, rax | |
+arg1: | |
+ cmp rcx, 1 | |
+ jl docall | |
+ mov rax, args$[rbp] | |
+ mov rax, [rax] | |
+ mov rcx, rax | |
+docall: | |
+ call qword ptr p$[rbp] | |
+ mov rsp, rbp | |
+ pop rbp | |
+ ret | |
+msc_x64_call endp | |
+_TEXT ENDS | |
+ | |
+end | |
diff --git a/autoload/msc_x86_call.asm b/autoload/msc_x86_call.asm | |
new file mode 100644 | |
index 0000000..6c88c81 | |
--- /dev/null | |
+++ b/autoload/msc_x86_call.asm | |
@@ -0,0 +1,33 @@ | |
+.686P | |
+.model flat | |
+ | |
+; why need underscore? | |
+PUBLIC _msc_x86_call | |
+ | |
+_TEXT SEGMENT | |
+; intptr_t msc_x86_call(FUNCTION p, long narg, INTPTR_T* args) | |
+args$ = 16 | |
+narg$ = 12 | |
+p$ = 8 | |
+_msc_x86_call proc | |
+ push ebp | |
+ mov ebp, esp | |
+ ; while narg > 0: | |
+ ; push args[--narg] | |
+ mov ecx, narg$[ebp] | |
+argN: | |
+ cmp ecx, 0 | |
+ jle docall | |
+ dec ecx | |
+ mov eax, args$[ebp] | |
+ push [eax+ecx*4] | |
+ jmp argN | |
+docall: | |
+ call dword ptr p$[ebp] | |
+ mov esp, ebp | |
+ pop ebp | |
+ ret | |
+_msc_x86_call endp | |
+_TEXT ENDS | |
+ | |
+end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment