Skip to content

Instantly share code, notes, and snippets.

@monkins1010
Created January 2, 2019 07:19
Show Gist options
  • Save monkins1010/95b0350e3ce3c84b2a00e421cc137b90 to your computer and use it in GitHub Desktop.
Save monkins1010/95b0350e3ce3c84b2a00e421cc137b90 to your computer and use it in GitHub Desktop.
scanhash routine for verus2, not working on multiple threads
static const int PROTOCOL_VERSION = 170002;
#include <cuda_helper.h>
#define EQNONCE_OFFSET 30 /* 27:34 */
#define NONCE_OFT EQNONCE_OFFSET
static bool init[MAX_GPUS] = { 0 };
static int valid_sols[MAX_GPUS] = { 0 };
static uint8_t _ALIGN(64) data_sols[MAX_GPUS][10][1536] = { 0 }; // 140+3+1344 required
static __thread uint32_t throughput = 0;
extern void verus_hash(int thr_id, uint32_t threads, uint32_t startNonce, uint32_t* resNonces);
extern void verus_setBlock(uint8_t *blockf, uint32_t *pTargetIn, uint8_t *lkey, int thr_id);
extern void verus_init(int thr_id);
#ifndef htobe32
#define htobe32(x) swab32(x)
#endif
extern "C" int scanhash_verus(int thr_id, struct work *work, uint32_t max_nonce, unsigned long *hashes_done)
{
uint32_t _ALIGN(64) endiandata[35];
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
int dev_id = device_map[thr_id];
struct timeval tv_start, tv_end, diff;
double secs, solps;
uint32_t nonce_buf = 0;
uint32_t intensity = 20;
unsigned char block_41970[] = { 0xfd, 0x40, 0x05, 0x01 };
uint8_t _ALIGN(64) full_data[140 + 3 + 1344] = { 0 };
uint8_t* sol_data = &full_data[140];
// memcpy(full_data, block_41970, 1487);
memcpy(endiandata, pdata, 140);
memcpy(sol_data, block_41970, 4);
memcpy(full_data, endiandata, 140);
throughput = cuda_default_throughput(thr_id, 1U << intensity);
if (init[thr_id]) throughput = min(throughput, max_nonce - nonce_buf);
if (!init[thr_id])
{
cudaSetDevice(dev_id);
if (opt_cudaschedule == -1 && gpu_threads == 1) {
cudaDeviceReset();
// reduce cpu usage
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
CUDA_LOG_ERROR();
}
cuda_get_arch(thr_id);
gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput);
CVerusHash::init();
CVerusHashV2::init();
verus_init(thr_id);
init[thr_id] = true;
}
alignas(32) uint256 curHash, curTarget = *(uint256*)work->target;
const uint64_t *compResult = (uint64_t *)&curHash;
const uint64_t *compTarget = (uint64_t *)&curTarget;
CVerusHashV2 vh;
verusclhasher &vclh = vh.vclh;
uint8_t *localkey;
localkey = (uint8_t *)malloc(VERUS_KEY_SIZE); // make mem for 128 copies of key to send to gpu
vh.Reset();
vh.Write((const unsigned char*)full_data, 1487);
//vhw.Reset();
//vhw << work->data;
u128 *hashKey = (u128 *)verusclhasher_key.get();
verusclhash_descr *pdesc = (verusclhash_descr *)verusclhasher_descr.get();
void *hasherrefresh = ((unsigned char *)hashKey) + pdesc->keySizeInBytes;
const int keyrefreshsize = vclh.keyrefreshsize(); // number of 256 bit blocks
unsigned char *curBuf = vh.CurBuffer();
vh.FillExtra((u128 *)curBuf);
// skip keygen if it is the current key - TODO fix this
//if (pdesc->seed != *((uint256 *)curBuf))
//{
// generate a new key by chain hashing with Haraka256 from the last curbuf
// assume 256 bit boundary
int n256blks = pdesc->keySizeInBytes >> 5;
unsigned char *pkey = ((unsigned char *)hashKey);
unsigned char *psrc = curBuf;
for (int i = 0; i < n256blks; i++)
{
haraka256(pkey, psrc);
psrc = pkey;
pkey += 32;
}
pdesc->seed = *((uint256 *)curBuf);
memcpy(hasherrefresh, hashKey, pdesc->keySizeInBytes);
memcpy(localkey, hasherrefresh, 8832);
work->valid_nonces = 0;
// vh.Finalize2b((unsigned char *)&curHash);
gettimeofday(&tv_start, NULL);
verus_setBlock((uint8_t*)curBuf, work->target, localkey, thr_id); //set data to gpu kernel
do {
*hashes_done = nonce_buf + throughput;
verus_hash(thr_id, throughput, nonce_buf, work->nonces);
if (work->nonces[0] != UINT32_MAX)
{
const uint32_t Htarg = ptarget[7];
uint32_t _ALIGN(64) vhash[8];
*((uint32_t *)full_data + 368) = work->nonces[0];
memcpy(curBuf + 32, full_data + 1486 - 14, 15);
vh.Finalize2b((unsigned char *)&curHash);
memcpy(vhash, &curHash, 32);
if (vhash[7] <= Htarg && fulltest(vhash, ptarget))
{
work->valid_nonces++;
memcpy(work->data, endiandata, 140);
int nonce = work->valid_nonces - 1;
memcpy(work->extra, sol_data, 1347);
bn_store_hash_target_ratio(vhash, work->target, work, nonce);
work->nonces[work->valid_nonces - 1] = endiandata[NONCE_OFT];
//pdata[NONCE_OFT] = endiandata[NONCE_OFT] + 1;
goto out;
}
else if (vhash[7] > Htarg) {
gpu_increment_reject(thr_id);
if (!opt_quiet)
gpulog(LOG_WARNING, thr_id, "nonce %08x does not validate on CPU!", work->nonces[0]);
}
}
if ((uint64_t)throughput + (uint64_t)nonce_buf >= (uint64_t)max_nonce) {
break;
}
nonce_buf += throughput;
} while (!work_restart[thr_id].restart);
out:
gettimeofday(&tv_end, NULL);
timeval_subtract(&diff, &tv_end, &tv_start);
secs = (1.0 * diff.tv_sec) + (0.000001 * diff.tv_usec);
solps = (double)nonce_buf / secs;
//gpulog(LOG_INFO, thr_id, "%d k/hashes in %.2f s (%.2f MH/s)", nonce_buf / 1000, secs, solps / 1000000);
// H/s
//*hashes_done = first_nonce;
pdata[NONCE_OFT] = endiandata[NONCE_OFT] + 1;
free(localkey);
return work->valid_nonces;
}
// cleanup
void free_verushash(int thr_id)
{
if (!init[thr_id])
return;
init[thr_id] = false;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment