Skip to content

Instantly share code, notes, and snippets.

@gmacon
Created May 13, 2011 20:49
Show Gist options
  • Save gmacon/971291 to your computer and use it in GitHub Desktop.
Save gmacon/971291 to your computer and use it in GitHub Desktop.
cpuminer OS X XMM SSE2 patch
diff --git a/x86_64/sha256_xmm_amd64.asm b/x86_64/sha256_xmm_amd64.asm
index 4fa0ea9..9e36159 100644
--- a/x86_64/sha256_xmm_amd64.asm
+++ b/x86_64/sha256_xmm_amd64.asm
@@ -13,13 +13,14 @@ BITS 64
%define data rsi
%define init rdx
-extern g_4sha256_k
+extern _g_4sha256_k
-global CalcSha256_x64
+global _CalcSha256_x64
; CalcSha256 hash(rdi), data(rsi), init(rdx)
-CalcSha256_x64:
+_CalcSha256_x64:
push rbx
+ push r8
LAB_NEXT_NONCE:
mov r11, data
@@ -78,6 +79,7 @@ LAB_CALC:
pop rcx
mov rax, 0
+ mov r8, qword _g_4sha256_k
; Load the init values of the message into the hash.
@@ -103,8 +105,9 @@ LAB_LOOP:
;; T t1 = h + (Rotr32(e, 6) ^ Rotr32(e, 11) ^ Rotr32(e, 25)) + ((e & f) ^ AndNot(e, g)) + Expand32<T>(g_sha256_k[j]) + w[j]
movdqa xmm6, [rsi+rax*4]
- paddd xmm6, g_4sha256_k[rax*4]
+ paddd xmm6, [r8]
add rax, 4
+ add r8, 16
paddd xmm6, xmm10 ; +h
@@ -215,5 +218,6 @@ debug_me:
movdqa [rdi+7*16], xmm10
LAB_RET:
+ pop r8
pop rbx
ret
@djp3
Copy link

djp3 commented Mar 23, 2013

In addition to this patch I had to fix the Makefile, rerun autogen.sh, and reconfigure with:
CFLAGS="-O3 -Wall -msse2 -arch x86_64" ./configure
although I think the -m and -arch flags are the defaults on my Mac OS X 10.8.3 MacBookPro

patch:

diff --git a/x86_64/Makefile.am b/x86_64/Makefile.am
index c74ddd2..2b91f3a 100644
--- a/x86_64/Makefile.am
+++ b/x86_64/Makefile.am
@@ -5,4 +5,4 @@ SUFFIXES = .asm
libx8664_a_SOURCES = sha256_xmm_amd64.asm

.asm.o:

  •   $(YASM) -f elf64 $<
    
  •   $(YASM) -f macho64 $<
    

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment