Skip to content

Instantly share code, notes, and snippets.

@grishka
Created June 20, 2024 00:26
Show Gist options
  • Save grishka/c1be1c035f39564debfd4b195bdfcac4 to your computer and use it in GitHub Desktop.
Save grishka/c1be1c035f39564debfd4b195bdfcac4 to your computer and use it in GitHub Desktop.
SHA-256 brute-forcer for https://shallenge.quirino.net, GPU edition
/*********************************************************************
* Filename: sha256.c
* Author: Brad Conte (brad AT bradconte.com)
* Copyright:
* Disclaimer: This code is presented "as is" without any guarantees.
* Details: Implementation of the SHA-256 hashing algorithm.
SHA-256 is one of the three algorithms in the SHA2
specification. The others, SHA-384 and SHA-512, are not
offered in this implementation.
Algorithm specification can be found here:
* http://csrc.nist.gov/publications/fips/fips180-2/fips180-2withchangenotice.pdf
This implementation uses little endian byte order.
*********************************************************************/
/*************************** HEADER FILES ***************************/
#include <stdlib.h>
#include <memory.h>
#include "sha256.h"
/****************************** MACROS ******************************/
#define ROTLEFT(a,b) (((a) << (b)) | ((a) >> (32-(b))))
#define ROTRIGHT(a,b) (((a) >> (b)) | ((a) << (32-(b))))
#define CH(x,y,z) (((x) & (y)) ^ (~(x) & (z)))
#define MAJ(x,y,z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
#define EP0(x) (ROTRIGHT(x,2) ^ ROTRIGHT(x,13) ^ ROTRIGHT(x,22))
#define EP1(x) (ROTRIGHT(x,6) ^ ROTRIGHT(x,11) ^ ROTRIGHT(x,25))
#define SIG0(x) (ROTRIGHT(x,7) ^ ROTRIGHT(x,18) ^ ((x) >> 3))
#define SIG1(x) (ROTRIGHT(x,17) ^ ROTRIGHT(x,19) ^ ((x) >> 10))
/**************************** VARIABLES *****************************/
static const WORD k[64] = {
0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5,0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5,
0xd807aa98,0x12835b01,0x243185be,0x550c7dc3,0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174,
0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc,0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da,
0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7,0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967,
0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13,0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85,
0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3,0xd192e819,0xd6990624,0xf40e3585,0x106aa070,
0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5,0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3,
0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208,0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
};
/*********************** FUNCTION DEFINITIONS ***********************/
void sha256_transform(SHA256_CTX *ctx, const BYTE data[])
{
WORD a, b, c, d, e, f, g, h, i, j, t1, t2, m[64];
for (i = 0, j = 0; i < 16; ++i, j += 4)
m[i] = (data[j] << 24) | (data[j + 1] << 16) | (data[j + 2] << 8) | (data[j + 3]);
for ( ; i < 64; ++i)
m[i] = SIG1(m[i - 2]) + m[i - 7] + SIG0(m[i - 15]) + m[i - 16];
a = ctx->state[0];
b = ctx->state[1];
c = ctx->state[2];
d = ctx->state[3];
e = ctx->state[4];
f = ctx->state[5];
g = ctx->state[6];
h = ctx->state[7];
for (i = 0; i < 64; ++i) {
t1 = h + EP1(e) + CH(e,f,g) + k[i] + m[i];
t2 = EP0(a) + MAJ(a,b,c);
h = g;
g = f;
f = e;
e = d + t1;
d = c;
c = b;
b = a;
a = t1 + t2;
}
ctx->state[0] += a;
ctx->state[1] += b;
ctx->state[2] += c;
ctx->state[3] += d;
ctx->state[4] += e;
ctx->state[5] += f;
ctx->state[6] += g;
ctx->state[7] += h;
}
void sha256_init(SHA256_CTX *ctx)
{
ctx->datalen = 0;
ctx->bitlen = 0;
ctx->state[0] = 0x6a09e667;
ctx->state[1] = 0xbb67ae85;
ctx->state[2] = 0x3c6ef372;
ctx->state[3] = 0xa54ff53a;
ctx->state[4] = 0x510e527f;
ctx->state[5] = 0x9b05688c;
ctx->state[6] = 0x1f83d9ab;
ctx->state[7] = 0x5be0cd19;
}
void sha256_update(SHA256_CTX *ctx, const BYTE data[], size_t len)
{
WORD i;
for (i = 0; i < len; ++i) {
ctx->data[ctx->datalen] = data[i];
ctx->datalen++;
if (ctx->datalen == 64) {
sha256_transform(ctx, ctx->data);
ctx->bitlen += 512;
ctx->datalen = 0;
}
}
}
void sha256_final(SHA256_CTX *ctx, BYTE hash[])
{
WORD i;
i = ctx->datalen;
// Pad whatever data is left in the buffer.
if (ctx->datalen < 56) {
ctx->data[i++] = 0x80;
while (i < 56)
ctx->data[i++] = 0x00;
}
else {
ctx->data[i++] = 0x80;
while (i < 64)
ctx->data[i++] = 0x00;
sha256_transform(ctx, ctx->data);
memset(ctx->data, 0, 56);
}
// Append to the padding the total message's length in bits and transform.
ctx->bitlen += ctx->datalen * 8;
ctx->data[63] = ctx->bitlen;
ctx->data[62] = ctx->bitlen >> 8;
ctx->data[61] = ctx->bitlen >> 16;
ctx->data[60] = ctx->bitlen >> 24;
ctx->data[59] = ctx->bitlen >> 32;
ctx->data[58] = ctx->bitlen >> 40;
ctx->data[57] = ctx->bitlen >> 48;
ctx->data[56] = ctx->bitlen >> 56;
sha256_transform(ctx, ctx->data);
// Since this implementation uses little endian byte ordering and SHA uses big endian,
// reverse all the bytes when copying the final state to the output hash.
for (i = 0; i < 4; ++i) {
hash[i] = (ctx->state[0] >> (24 - i * 8)) & 0x000000ff;
hash[i + 4] = (ctx->state[1] >> (24 - i * 8)) & 0x000000ff;
hash[i + 8] = (ctx->state[2] >> (24 - i * 8)) & 0x000000ff;
hash[i + 12] = (ctx->state[3] >> (24 - i * 8)) & 0x000000ff;
hash[i + 16] = (ctx->state[4] >> (24 - i * 8)) & 0x000000ff;
hash[i + 20] = (ctx->state[5] >> (24 - i * 8)) & 0x000000ff;
hash[i + 24] = (ctx->state[6] >> (24 - i * 8)) & 0x000000ff;
hash[i + 28] = (ctx->state[7] >> (24 - i * 8)) & 0x000000ff;
}
}
/*********************************************************************
* Filename: sha256.h
* Author: Brad Conte (brad AT bradconte.com)
* Copyright:
* Disclaimer: This code is presented "as is" without any guarantees.
* Details: Defines the API for the corresponding SHA1 implementation.
*********************************************************************/
#ifndef SHA256_H
#define SHA256_H
/*************************** HEADER FILES ***************************/
#include <stddef.h>
/****************************** MACROS ******************************/
#define SHA256_BLOCK_SIZE 32 // SHA256 outputs a 32 byte digest
/**************************** DATA TYPES ****************************/
typedef unsigned char BYTE; // 8-bit byte
typedef unsigned int WORD; // 32-bit word, change to "long" for 16-bit machines
typedef struct {
BYTE data[64];
WORD datalen;
unsigned long long bitlen;
WORD state[8];
} SHA256_CTX;
/*********************** FUNCTION DECLARATIONS **********************/
void sha256_init(SHA256_CTX *ctx);
void sha256_update(SHA256_CTX *ctx, const BYTE data[], size_t len);
void sha256_final(SHA256_CTX *ctx, BYTE hash[]);
#endif // SHA256_H
//
// sha256.metal
// shallenge-gpu
//
// Created by Grishka on 19.06.2024.
//
// adapted from https://github.com/B-Con/crypto-algorithms/blob/master/sha256.c
#include <metal_stdlib>
using namespace metal;
#define ROTLEFT(a,b) (((a) << (b)) | ((a) >> (32-(b))))
#define ROTRIGHT(a,b) (((a) >> (b)) | ((a) << (32-(b))))
#define CH(x,y,z) (((x) & (y)) ^ (~(x) & (z)))
#define MAJ(x,y,z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
#define EP0(x) (ROTRIGHT(x,2) ^ ROTRIGHT(x,13) ^ ROTRIGHT(x,22))
#define EP1(x) (ROTRIGHT(x,6) ^ ROTRIGHT(x,11) ^ ROTRIGHT(x,25))
#define SIG0(x) (ROTRIGHT(x,7) ^ ROTRIGHT(x,18) ^ ((x) >> 3))
#define SIG1(x) (ROTRIGHT(x,17) ^ ROTRIGHT(x,19) ^ ((x) >> 10))
typedef struct {
uint8_t data[64];
uint32_t datalen;
uint64_t bitlen;
uint32_t state[8];
} SHA256_CTX;
constant uint32_t k[64] = {
0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5,0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5,
0xd807aa98,0x12835b01,0x243185be,0x550c7dc3,0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174,
0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc,0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da,
0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7,0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967,
0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13,0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85,
0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3,0xd192e819,0xd6990624,0xf40e3585,0x106aa070,
0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5,0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3,
0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208,0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
};
void sha256_transform(thread SHA256_CTX *ctx, const uint8_t data[]){
uint32_t a, b, c, d, e, f, g, h, i, j, t1, t2, m[64];
for (i = 0, j = 0; i < 16; ++i, j += 4)
m[i] = (data[j] << 24) | (data[j + 1] << 16) | (data[j + 2] << 8) | (data[j + 3]);
for ( ; i < 64; ++i)
m[i] = SIG1(m[i - 2]) + m[i - 7] + SIG0(m[i - 15]) + m[i - 16];
a = ctx->state[0];
b = ctx->state[1];
c = ctx->state[2];
d = ctx->state[3];
e = ctx->state[4];
f = ctx->state[5];
g = ctx->state[6];
h = ctx->state[7];
for (i = 0; i < 64; ++i) {
t1 = h + EP1(e) + CH(e,f,g) + k[i] + m[i];
t2 = EP0(a) + MAJ(a,b,c);
h = g;
g = f;
f = e;
e = d + t1;
d = c;
c = b;
b = a;
a = t1 + t2;
}
ctx->state[0] += a;
ctx->state[1] += b;
ctx->state[2] += c;
ctx->state[3] += d;
ctx->state[4] += e;
ctx->state[5] += f;
ctx->state[6] += g;
ctx->state[7] += h;
}
void sha256_init(thread SHA256_CTX *ctx){
ctx->datalen = 0;
ctx->bitlen = 0;
ctx->state[0] = 0x6a09e667;
ctx->state[1] = 0xbb67ae85;
ctx->state[2] = 0x3c6ef372;
ctx->state[3] = 0xa54ff53a;
ctx->state[4] = 0x510e527f;
ctx->state[5] = 0x9b05688c;
ctx->state[6] = 0x1f83d9ab;
ctx->state[7] = 0x5be0cd19;
}
void sha256_update(thread SHA256_CTX *ctx, const uint8_t data[], size_t len){
uint32_t i;
for (i = 0; i < len; ++i) {
ctx->data[ctx->datalen] = data[i];
ctx->datalen++;
if (ctx->datalen == 64) {
sha256_transform(ctx, ctx->data);
ctx->bitlen += 512;
ctx->datalen = 0;
}
}
}
void sha256_final(thread SHA256_CTX *ctx){
uint32_t i;
i = ctx->datalen;
// Pad whatever data is left in the buffer.
if (ctx->datalen < 56) {
ctx->data[i++] = 0x80;
while (i < 56)
ctx->data[i++] = 0x00;
}
else {
ctx->data[i++] = 0x80;
while (i < 64)
ctx->data[i++] = 0x00;
sha256_transform(ctx, ctx->data);
for(i=0;i<56;i++)
ctx->data[i]=0;
}
// Append to the padding the total message's length in bits and transform.
ctx->bitlen += ctx->datalen * 8;
ctx->data[63] = ctx->bitlen;
ctx->data[62] = ctx->bitlen >> 8;
ctx->data[61] = ctx->bitlen >> 16;
ctx->data[60] = ctx->bitlen >> 24;
ctx->data[59] = ctx->bitlen >> 32;
ctx->data[58] = ctx->bitlen >> 40;
ctx->data[57] = ctx->bitlen >> 48;
ctx->data[56] = ctx->bitlen >> 56;
sha256_transform(ctx, ctx->data);
}
kernel void sha256(device const uint8_t *strPrefix, device uint8_t *result, uint index [[thread_position_in_grid]]){
SHA256_CTX _ctx;
thread SHA256_CTX *ctx=&_ctx;
sha256_init(ctx);
uint8_t buf[128];
uint len=strPrefix[0];
for(uint i=0;i<len;i++){
buf[i]=strPrefix[i+1];
}
sha256_update(ctx, buf, len);
const uint8_t alphabet[]="0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/";
buf[0]=alphabet[(index >> 12) & 63];
buf[1]=alphabet[(index >> 6) & 63];
buf[2]=alphabet[index & 63];
sha256_update(ctx, buf, 3);
sha256_final(ctx);
result[index]=ctx->state[0]==0 ? 1 : 0;
}
kernel void sha256_precomputed(device const uint8_t *data, device const uint32_t *state,
device const uint32_t *dataAndBitLen, device uint8_t *result,
uint index [[thread_position_in_grid]]){
SHA256_CTX _ctx;
for(uint i=0;i<64;i++)
_ctx.data[i]=data[i];
for(uint i=0;i<8;i++)
_ctx.state[i]=state[i];
_ctx.datalen=dataAndBitLen[0];
_ctx.bitlen=dataAndBitLen[1];
thread SHA256_CTX *ctx=&_ctx;
const uint8_t alphabet[]="0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/";
uint8_t buf[]{
alphabet[(index >> 12) & 63],
alphabet[(index >> 6) & 63],
alphabet[index & 63]
};
sha256_update(ctx, buf, 3);
sha256_final(ctx);
result[index]=ctx->state[0]==0 ? 1 : 0;
}
//
// main.m
// shallenge-gpu
//
// Created by Grishka on 19.06.2024.
//
#import <Foundation/Foundation.h>
#import <Metal/Metal.h>
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include "sha256.h" // not using CommonCrypto here because the state struct needs to be compatible across CPU and GPU
#define HASHES_PER_RUN (1 << 18)
double machTimebase, machTimestart;
id<MTLDevice> device;
id<MTLLibrary> library;
id<MTLComputePipelineState> pipelineState;
id<MTLFunction> function;
id<MTLCommandQueue> queue;
id<MTLBuffer> ctxDataBuffers[2], ctxStateBuffers[2], ctxLenBuffers[2], resultBuffers[2];
uint64_t counterOffset=0;
uint64_t totalHashes=0;
int bufferIndex=0;
double lastReportTime=0;
const char alphabet[]="0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/";
void initMachTime(void){
mach_timebase_info_data_t tb;
mach_timebase_info(&tb);
machTimebase=((double)tb.numer)/tb.denom;
machTimestart=mach_absolute_time();
}
double getCurrentTime(void){
return (mach_absolute_time()-machTimestart)*machTimebase/1000000000.0;
}
void printGoodHash(const char *prefix, uint32_t offset){
printf("%s", prefix);
SHA256_CTX ctx;
sha256_init(&ctx);
sha256_update(&ctx, (unsigned char*)prefix, strlen(prefix));
char buf[]={
alphabet[(offset >> 12) & 63],
alphabet[(offset >> 6) & 63],
alphabet[offset & 63],
0 // for printf
};
sha256_update(&ctx, (unsigned char*)buf, 3);
unsigned char hash[32];
sha256_final(&ctx, hash);
printf("%s:\n", buf);
for(int i=0;i<32;i++){
if(i>0 && i%4==0)
printf(" ");
printf("%02x", hash[i]);
}
printf("\n\n");
}
void enqueueOneRun(bool recursively){
int currentBufIndex=bufferIndex;
uint64_t currentCounterOffset=counterOffset++;
bufferIndex=(bufferIndex+1)%2;
id<MTLBuffer> ctxDataBuffer=ctxDataBuffers[currentBufIndex];
id<MTLBuffer> ctxStateBuffer=ctxStateBuffers[currentBufIndex];
id<MTLBuffer> ctxLengthBuffer=ctxLenBuffers[currentBufIndex];
id<MTLBuffer> resultBuffer=resultBuffers[currentBufIndex];
id<MTLCommandBuffer> commandBuffer=[queue commandBuffer];
id<MTLComputeCommandEncoder> encoder=[commandBuffer computeCommandEncoder];
[encoder setComputePipelineState:pipelineState];
[encoder setBuffer:ctxDataBuffer offset:0 atIndex:0];
[encoder setBuffer:ctxStateBuffer offset:0 atIndex:1];
[encoder setBuffer:ctxLengthBuffer offset:0 atIndex:2];
[encoder setBuffer:resultBuffer offset:0 atIndex:3];
char prefix[]="grishka/AppleM1MaxGPU/I+had+to+learn+Metal+for+this//000000";
size_t prefixLen=strlen(prefix);
prefix[prefixLen-1]=alphabet[currentCounterOffset & 63];
prefix[prefixLen-2]=alphabet[(currentCounterOffset >> 6) & 63];
prefix[prefixLen-3]=alphabet[(currentCounterOffset >> 12) & 63];
prefix[prefixLen-4]=alphabet[(currentCounterOffset >> 18) & 63];
prefix[prefixLen-5]=alphabet[(currentCounterOffset >> 24) & 63];
prefix[prefixLen-6]=alphabet[(currentCounterOffset >> 30) & 63];
SHA256_CTX ctx;
sha256_init(&ctx);
sha256_update(&ctx, (unsigned char*)prefix, prefixLen);
memcpy(ctxDataBuffer.contents, ctx.data, 64);
memcpy(ctxStateBuffer.contents, ctx.state, 32);
uint32_t *lengths=ctxLengthBuffer.contents;
lengths[0]=ctx.datalen;
lengths[1]=(uint32_t)ctx.bitlen;
char *_prefix=strdup(prefix);
MTLSize gridSize=MTLSizeMake(HASHES_PER_RUN, 1, 1);
MTLSize threadGroupSize=MTLSizeMake(MIN(pipelineState.maxTotalThreadsPerThreadgroup, HASHES_PER_RUN), 1, 1);
[encoder dispatchThreads:gridSize threadsPerThreadgroup:threadGroupSize];
[encoder endEncoding];
[commandBuffer addCompletedHandler:^(id<MTLCommandBuffer> cmdBuf){
uint8_t *resultData=resultBuffer.contents;
for(uint32_t i=0;i<HASHES_PER_RUN;i++){
if(resultData[i]){
printGoodHash(_prefix, i);
}
}
free(_prefix);
totalHashes+=HASHES_PER_RUN;
double t=getCurrentTime();
if(t-lastReportTime>10.0){
lastReportTime=t;
printf("Running for %.3f s, avg rate %.3f MH/s\n", t, ((double)totalHashes)/t/1000000.0);
}
if(recursively)
enqueueOneRun(true);
}];
[commandBuffer commit];
}
int main(int argc, const char * argv[]) {
@autoreleasepool {
NSError *error=nil;
NSArray<id<MTLDevice>> *devices=MTLCopyAllDevices();
device=devices[0];
NSLog(@"Using device: %@", device.name);
library=[device newDefaultLibrary];
assert(library);
function=[library newFunctionWithName:@"sha256_precomputed"];
assert(function);
pipelineState=[device newComputePipelineStateWithFunction:function error:&error];
assert(!error && pipelineState);
queue=[device newCommandQueue];
for(int i=0;i<2;i++){
ctxDataBuffers[i]=[device newBufferWithLength:64 options:MTLResourceStorageModeShared];
ctxStateBuffers[i]=[device newBufferWithLength:32 options:MTLResourceStorageModeShared];
ctxLenBuffers[i]=[device newBufferWithLength:8 options:MTLResourceStorageModeShared];
resultBuffers[i]=[device newBufferWithLength:HASHES_PER_RUN options:MTLResourceStorageModeShared];
}
initMachTime();
enqueueOneRun(false);
enqueueOneRun(true);
getchar();
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment