Created
January 2, 2019 07:19
-
-
Save monkins1010/95b0350e3ce3c84b2a00e421cc137b90 to your computer and use it in GitHub Desktop.
scanhash routine for verus2, not working on multiple threads
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
static const int PROTOCOL_VERSION = 170002; | |
#include <cuda_helper.h> | |
#define EQNONCE_OFFSET 30 /* 27:34 */ | |
#define NONCE_OFT EQNONCE_OFFSET | |
static bool init[MAX_GPUS] = { 0 }; | |
static int valid_sols[MAX_GPUS] = { 0 }; | |
static uint8_t _ALIGN(64) data_sols[MAX_GPUS][10][1536] = { 0 }; // 140+3+1344 required | |
static __thread uint32_t throughput = 0; | |
extern void verus_hash(int thr_id, uint32_t threads, uint32_t startNonce, uint32_t* resNonces); | |
extern void verus_setBlock(uint8_t *blockf, uint32_t *pTargetIn, uint8_t *lkey, int thr_id); | |
extern void verus_init(int thr_id); | |
#ifndef htobe32 | |
#define htobe32(x) swab32(x) | |
#endif | |
extern "C" int scanhash_verus(int thr_id, struct work *work, uint32_t max_nonce, unsigned long *hashes_done) | |
{ | |
uint32_t _ALIGN(64) endiandata[35]; | |
uint32_t *pdata = work->data; | |
uint32_t *ptarget = work->target; | |
int dev_id = device_map[thr_id]; | |
struct timeval tv_start, tv_end, diff; | |
double secs, solps; | |
uint32_t nonce_buf = 0; | |
uint32_t intensity = 20; | |
unsigned char block_41970[] = { 0xfd, 0x40, 0x05, 0x01 }; | |
uint8_t _ALIGN(64) full_data[140 + 3 + 1344] = { 0 }; | |
uint8_t* sol_data = &full_data[140]; | |
// memcpy(full_data, block_41970, 1487); | |
memcpy(endiandata, pdata, 140); | |
memcpy(sol_data, block_41970, 4); | |
memcpy(full_data, endiandata, 140); | |
throughput = cuda_default_throughput(thr_id, 1U << intensity); | |
if (init[thr_id]) throughput = min(throughput, max_nonce - nonce_buf); | |
if (!init[thr_id]) | |
{ | |
cudaSetDevice(dev_id); | |
if (opt_cudaschedule == -1 && gpu_threads == 1) { | |
cudaDeviceReset(); | |
// reduce cpu usage | |
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync); | |
CUDA_LOG_ERROR(); | |
} | |
cuda_get_arch(thr_id); | |
gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput); | |
CVerusHash::init(); | |
CVerusHashV2::init(); | |
verus_init(thr_id); | |
init[thr_id] = true; | |
} | |
alignas(32) uint256 curHash, curTarget = *(uint256*)work->target; | |
const uint64_t *compResult = (uint64_t *)&curHash; | |
const uint64_t *compTarget = (uint64_t *)&curTarget; | |
CVerusHashV2 vh; | |
verusclhasher &vclh = vh.vclh; | |
uint8_t *localkey; | |
localkey = (uint8_t *)malloc(VERUS_KEY_SIZE); // make mem for 128 copies of key to send to gpu | |
vh.Reset(); | |
vh.Write((const unsigned char*)full_data, 1487); | |
//vhw.Reset(); | |
//vhw << work->data; | |
u128 *hashKey = (u128 *)verusclhasher_key.get(); | |
verusclhash_descr *pdesc = (verusclhash_descr *)verusclhasher_descr.get(); | |
void *hasherrefresh = ((unsigned char *)hashKey) + pdesc->keySizeInBytes; | |
const int keyrefreshsize = vclh.keyrefreshsize(); // number of 256 bit blocks | |
unsigned char *curBuf = vh.CurBuffer(); | |
vh.FillExtra((u128 *)curBuf); | |
// skip keygen if it is the current key - TODO fix this | |
//if (pdesc->seed != *((uint256 *)curBuf)) | |
//{ | |
// generate a new key by chain hashing with Haraka256 from the last curbuf | |
// assume 256 bit boundary | |
int n256blks = pdesc->keySizeInBytes >> 5; | |
unsigned char *pkey = ((unsigned char *)hashKey); | |
unsigned char *psrc = curBuf; | |
for (int i = 0; i < n256blks; i++) | |
{ | |
haraka256(pkey, psrc); | |
psrc = pkey; | |
pkey += 32; | |
} | |
pdesc->seed = *((uint256 *)curBuf); | |
memcpy(hasherrefresh, hashKey, pdesc->keySizeInBytes); | |
memcpy(localkey, hasherrefresh, 8832); | |
work->valid_nonces = 0; | |
// vh.Finalize2b((unsigned char *)&curHash); | |
gettimeofday(&tv_start, NULL); | |
verus_setBlock((uint8_t*)curBuf, work->target, localkey, thr_id); //set data to gpu kernel | |
do { | |
*hashes_done = nonce_buf + throughput; | |
verus_hash(thr_id, throughput, nonce_buf, work->nonces); | |
if (work->nonces[0] != UINT32_MAX) | |
{ | |
const uint32_t Htarg = ptarget[7]; | |
uint32_t _ALIGN(64) vhash[8]; | |
*((uint32_t *)full_data + 368) = work->nonces[0]; | |
memcpy(curBuf + 32, full_data + 1486 - 14, 15); | |
vh.Finalize2b((unsigned char *)&curHash); | |
memcpy(vhash, &curHash, 32); | |
if (vhash[7] <= Htarg && fulltest(vhash, ptarget)) | |
{ | |
work->valid_nonces++; | |
memcpy(work->data, endiandata, 140); | |
int nonce = work->valid_nonces - 1; | |
memcpy(work->extra, sol_data, 1347); | |
bn_store_hash_target_ratio(vhash, work->target, work, nonce); | |
work->nonces[work->valid_nonces - 1] = endiandata[NONCE_OFT]; | |
//pdata[NONCE_OFT] = endiandata[NONCE_OFT] + 1; | |
goto out; | |
} | |
else if (vhash[7] > Htarg) { | |
gpu_increment_reject(thr_id); | |
if (!opt_quiet) | |
gpulog(LOG_WARNING, thr_id, "nonce %08x does not validate on CPU!", work->nonces[0]); | |
} | |
} | |
if ((uint64_t)throughput + (uint64_t)nonce_buf >= (uint64_t)max_nonce) { | |
break; | |
} | |
nonce_buf += throughput; | |
} while (!work_restart[thr_id].restart); | |
out: | |
gettimeofday(&tv_end, NULL); | |
timeval_subtract(&diff, &tv_end, &tv_start); | |
secs = (1.0 * diff.tv_sec) + (0.000001 * diff.tv_usec); | |
solps = (double)nonce_buf / secs; | |
//gpulog(LOG_INFO, thr_id, "%d k/hashes in %.2f s (%.2f MH/s)", nonce_buf / 1000, secs, solps / 1000000); | |
// H/s | |
//*hashes_done = first_nonce; | |
pdata[NONCE_OFT] = endiandata[NONCE_OFT] + 1; | |
free(localkey); | |
return work->valid_nonces; | |
} | |
// cleanup | |
void free_verushash(int thr_id) | |
{ | |
if (!init[thr_id]) | |
return; | |
init[thr_id] = false; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment