Created
May 13, 2011 20:49
-
-
Save gmacon/971291 to your computer and use it in GitHub Desktop.
cpuminer OS X XMM SSE2 patch
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/x86_64/sha256_xmm_amd64.asm b/x86_64/sha256_xmm_amd64.asm | |
index 4fa0ea9..9e36159 100644 | |
--- a/x86_64/sha256_xmm_amd64.asm | |
+++ b/x86_64/sha256_xmm_amd64.asm | |
@@ -13,13 +13,14 @@ BITS 64 | |
%define data rsi | |
%define init rdx | |
-extern g_4sha256_k | |
+extern _g_4sha256_k | |
-global CalcSha256_x64 | |
+global _CalcSha256_x64 | |
; CalcSha256 hash(rdi), data(rsi), init(rdx) | |
-CalcSha256_x64: | |
+_CalcSha256_x64: | |
push rbx | |
+ push r8 | |
LAB_NEXT_NONCE: | |
mov r11, data | |
@@ -78,6 +79,7 @@ LAB_CALC: | |
pop rcx | |
mov rax, 0 | |
+ mov r8, qword _g_4sha256_k | |
; Load the init values of the message into the hash. | |
@@ -103,8 +105,9 @@ LAB_LOOP: | |
;; T t1 = h + (Rotr32(e, 6) ^ Rotr32(e, 11) ^ Rotr32(e, 25)) + ((e & f) ^ AndNot(e, g)) + Expand32<T>(g_sha256_k[j]) + w[j] | |
movdqa xmm6, [rsi+rax*4] | |
- paddd xmm6, g_4sha256_k[rax*4] | |
+ paddd xmm6, [r8] | |
add rax, 4 | |
+ add r8, 16 | |
paddd xmm6, xmm10 ; +h | |
@@ -215,5 +218,6 @@ debug_me: | |
movdqa [rdi+7*16], xmm10 | |
LAB_RET: | |
+ pop r8 | |
pop rbx | |
ret |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
In addition to this patch I had to fix the Makefile, rerun autogen.sh, and reconfigure with:
CFLAGS="-O3 -Wall -msse2 -arch x86_64" ./configure
although I think the -m and -arch flags are the defaults on my Mac OS X 10.8.3 MacBookPro
patch:
diff --git a/x86_64/Makefile.am b/x86_64/Makefile.am
index c74ddd2..2b91f3a 100644
--- a/x86_64/Makefile.am
+++ b/x86_64/Makefile.am
@@ -5,4 +5,4 @@ SUFFIXES = .asm
libx8664_a_SOURCES = sha256_xmm_amd64.asm
.asm.o: