billywhizz/README.md

## README.md

      
    Raw
  

              README.md
            
          
    Create the test file

dd if=/dev/urandom of=/dev/shm/random.bin bs=65536 count=10000

Compile the c program

gcc -g -O3 -o sha256sum sha256sum.c

Run the tests

time ./sha256sum /dev/shm/random.bin 
763cc42ea1c97772042e4c0421ff286910525ba3a7071a2b3229f9c3704aed95

real    0m3.010s
user    0m2.918s
sys     0m0.092s

time node sha256sum.js /dev/shm/random.bin 
763cc42ea1c97772042e4c0421ff286910525ba3a7071a2b3229f9c3704aed95

real    0m1.645s
user    0m1.565s
sys     0m0.084s

Generate the Flamegraphs

./perf c
./perf node

Versions


node: 15.6.0
linux kernel: 5.4.62-generic
ubuntu 18.4.5
cpu: Intel(R) Core(TM) i5-8250U CPU @ 1.60GHz
glibc: 2.2.7
node.js openssl: 1.1.1i

Discussion

It seems that the builtin glibc sha256sum program can do a checksum at only half the rate of the included javascript program.
We can see from the flamegraph and the included glibc source (modified from here) that sha256sum.c spends most of it's time in the sha256_process_block function. this is written in c.
The javascript program spends most of its time in sha256_block_data_order_avx2 which is a cpu optimized assembly routine in the openssl project.
Conclusion

Any implementation of sha256 checksums using recent openssl will be approx 2x faster than the builtin sha256sum program.

  
## c.svg

      
Display the source blob

    
Display the rendered blob

    
    Raw
  

              c.svg
            
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## node.svg

      
Display the source blob

    
Display the rendered blob

    
    Raw
  

              node.svg
            
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## perf
#!/bin/bash
_term() {
  echo "Caught SIGTERM signal!"
  kill -TERM "$pid" 2>/dev/null
}
trap _term SIGTERM
dest="/tmp/FlameGraph"
if [ ! -d "/tmp/FlameGraph" ]; then
  echo "installing flamegraph in /tmp/FlameGraph"
  mkdir -p $dest
  curl -L -o /tmp/FlameGraph/stackcollapse.pl https://raw.githubusercontent.com/brendangregg/FlameGraph/v1.0/stackcollapse-perf.pl
  curl -L -o /tmp/FlameGraph/flamegraph.pl https://raw.githubusercontent.com/brendangregg/FlameGraph/v1.0/flamegraph.pl
fi
wd=$(pwd)
rm -f out.svg
if [ "$1" = "c" ] ; then
  ./sha256sum /dev/shm/random.bin &
  FNAME="c.svg"
else
  node --perf-basic-prof sha256sum-node.js /dev/shm/random.bin &
  FNAME="node.svg"
fi
pid=$!
echo "running perf for 30 seconds"
sudo perf record -F 99 -p $pid -g -- sleep 5
sudo perf script > out.stack
perl $dest/stackcollapse.pl < $wd/out.stack | perl $dest/flamegraph.pl > $wd/$FNAME
echo "perf complete, killing $pid"
kill -9 $pid 2>/dev/null
# suppress the terminated alert
wait $pid 2>/dev/null
sudo rm -f /tmp/perf-*.map
rm -f isolate-*.log
rm -f out.stack
sudo rm -f perf.data.old
sudo rm -f perf.data
echo done

## sha256sum.c
// adapted from here: https://github.com/SamB/debian-coreutils/blob/master/lib/sha256.c

/* sha256.c - Functions to compute SHA256 and SHA224 message digest of files or
   memory blocks according to the NIST specification FIPS-180-2.
   Copyright (C) 2005-2006, 2008-2012 Free Software Foundation, Inc.
   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation, either version 3 of the License, or
   (at your option) any later version.
   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.
   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */

/* Written by David Madore, considerably copypasting from
   Scott G. Miller's sha1.c
*/

#include <stdio.h>
#include <stdint.h>
#include <stdalign.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>

struct sha256_ctx
{
  uint32_t state[8];
  uint32_t total[2];
  size_t buflen;
  uint32_t buffer[32];
};

enum { SHA224_DIGEST_SIZE = 224 / 8 };
enum { SHA256_DIGEST_SIZE = 256 / 8 };

# define SWAP(n) \
    (((n) << 24) | (((n) & 0xff00) << 8) | (((n) >> 8) & 0xff00) | ((n) >> 24))

#define BLOCKSIZE 32768
static const unsigned char fillbuf[64] = { 0x80, 0 /* , 0, 0, ...  */ };

void
sha256_init_ctx (struct sha256_ctx *ctx)
{
  ctx->state[0] = 0x6a09e667UL;
  ctx->state[1] = 0xbb67ae85UL;
  ctx->state[2] = 0x3c6ef372UL;
  ctx->state[3] = 0xa54ff53aUL;
  ctx->state[4] = 0x510e527fUL;
  ctx->state[5] = 0x9b05688cUL;
  ctx->state[6] = 0x1f83d9abUL;
  ctx->state[7] = 0x5be0cd19UL;

  ctx->total[0] = ctx->total[1] = 0;
  ctx->buflen = 0;
}

static inline void
set_uint32 (char *cp, uint32_t v)
{
  memcpy (cp, &v, sizeof v);
}

void *
sha256_read_ctx (const struct sha256_ctx *ctx, void *resbuf)
{
  int i;
  char *r = resbuf;

  for (i = 0; i < 8; i++)
    set_uint32 (r + i * sizeof ctx->state[0], SWAP (ctx->state[i]));

  return resbuf;
}


static void
sha256_conclude_ctx (struct sha256_ctx *ctx)
{
  /* Take yet unprocessed bytes into account.  */
  size_t bytes = ctx->buflen;
  size_t size = (bytes < 56) ? 64 / 4 : 64 * 2 / 4;

  /* Now count remaining bytes.  */
  ctx->total[0] += bytes;
  if (ctx->total[0] < bytes)
    ++ctx->total[1];

  /* Put the 64-bit file length in *bits* at the end of the buffer.
     Use set_uint32 rather than a simple assignment, to avoid risk of
     unaligned access.  */
  set_uint32 ((char *) &ctx->buffer[size - 2],
              SWAP ((ctx->total[1] << 3) | (ctx->total[0] >> 29)));
  set_uint32 ((char *) &ctx->buffer[size - 1],
              SWAP (ctx->total[0] << 3));

  memcpy (&((char *) ctx->buffer)[bytes], fillbuf, (size - 2) * 4 - bytes);

  /* Process last bytes.  */
  sha256_process_block (ctx->buffer, size * 4, ctx);
}

void *
sha256_finish_ctx (struct sha256_ctx *ctx, void *resbuf)
{
  sha256_conclude_ctx (ctx);
  return sha256_read_ctx (ctx, resbuf);
}

int
sha256_stream (FILE *stream, void *resblock)
{
  struct sha256_ctx ctx;
  size_t sum;

  char *buffer = malloc (BLOCKSIZE + 72);
  if (!buffer)
    return 1;

  /* Initialize the computation context.  */
  sha256_init_ctx (&ctx);

  /* Iterate over full file contents.  */
  while (1)
    {
      /* We read the file in blocks of BLOCKSIZE bytes.  One call of the
         computation function processes the whole buffer so that with the
         next round of the loop another block can be read.  */
      size_t n;
      sum = 0;

      /* Read block.  Take care for partial reads.  */
      while (1)
        {
          n = fread (buffer + sum, 1, BLOCKSIZE - sum, stream);

          sum += n;

          if (sum == BLOCKSIZE)
            break;

          if (n == 0)
            {
              /* Check for the error flag IFF N == 0, so that we don't
                 exit the loop after a partial read due to e.g., EAGAIN
                 or EWOULDBLOCK.  */
              if (ferror (stream))
                {
                  free (buffer);
                  return 1;
                }
              goto process_partial_block;
            }

          /* We've read at least one byte, so ignore errors.  But always
             check for EOF, since feof may be true even though N > 0.
             Otherwise, we could end up calling fread after EOF.  */
          if (feof (stream))
            goto process_partial_block;
        }

      /* Process buffer with BLOCKSIZE bytes.  Note that
                        BLOCKSIZE % 64 == 0
       */
      sha256_process_block (buffer, BLOCKSIZE, &ctx);
    }

 process_partial_block:;

  /* Process any remaining bytes.  */
  if (sum > 0)
    sha256_process_bytes (buffer, sum, &ctx);

  /* Construct result in desired memory.  */
  sha256_finish_ctx (&ctx, resblock);
  free (buffer);
  return 0;
}

void *
sha256_buffer (const char *buffer, size_t len, void *resblock)
{
  struct sha256_ctx ctx;

  /* Initialize the computation context.  */
  sha256_init_ctx (&ctx);

  /* Process whole buffer but last len % 64 bytes.  */
  sha256_process_bytes (buffer, len, &ctx);

  /* Put result in desired memory area.  */
  return sha256_finish_ctx (&ctx, resblock);
}

void
sha256_process_bytes (const void *buffer, size_t len, struct sha256_ctx *ctx)
{
  /* When we already have some bits in our internal buffer concatenate
     both inputs first.  */
  if (ctx->buflen != 0)
    {
      size_t left_over = ctx->buflen;
      size_t add = 128 - left_over > len ? len : 128 - left_over;

      memcpy (&((char *) ctx->buffer)[left_over], buffer, add);
      ctx->buflen += add;

      if (ctx->buflen > 64)
        {
          sha256_process_block (ctx->buffer, ctx->buflen & ~63, ctx);

          ctx->buflen &= 63;
          /* The regions in the following copy operation cannot overlap.  */
          memcpy (ctx->buffer,
                  &((char *) ctx->buffer)[(left_over + add) & ~63],
                  ctx->buflen);
        }

      buffer = (const char *) buffer + add;
      len -= add;
    }

  /* Process available complete blocks.  */
  if (len >= 64)
    {
#if !_STRING_ARCH_unaligned
# define UNALIGNED_P(p) ((uintptr_t) (p) % alignof (uint32_t) != 0)
      if (UNALIGNED_P (buffer))
        while (len > 64)
          {
            sha256_process_block (memcpy (ctx->buffer, buffer, 64), 64, ctx);
            buffer = (const char *) buffer + 64;
            len -= 64;
          }
      else
#endif
        {
          sha256_process_block (buffer, len & ~63, ctx);
          buffer = (const char *) buffer + (len & ~63);
          len &= 63;
        }
    }

  /* Move remaining bytes in internal buffer.  */
  if (len > 0)
    {
      size_t left_over = ctx->buflen;

      memcpy (&((char *) ctx->buffer)[left_over], buffer, len);
      left_over += len;
      if (left_over >= 64)
        {
          sha256_process_block (ctx->buffer, 64, ctx);
          left_over -= 64;
          memcpy (ctx->buffer, &ctx->buffer[16], left_over);
        }
      ctx->buflen = left_over;
    }
}

/* --- Code below is the primary difference between sha1.c and sha256.c --- */

/* SHA256 round constants */
#define K(I) sha256_round_constants[I]
static const uint32_t sha256_round_constants[64] = {
  0x428a2f98UL, 0x71374491UL, 0xb5c0fbcfUL, 0xe9b5dba5UL,
  0x3956c25bUL, 0x59f111f1UL, 0x923f82a4UL, 0xab1c5ed5UL,
  0xd807aa98UL, 0x12835b01UL, 0x243185beUL, 0x550c7dc3UL,
  0x72be5d74UL, 0x80deb1feUL, 0x9bdc06a7UL, 0xc19bf174UL,
  0xe49b69c1UL, 0xefbe4786UL, 0x0fc19dc6UL, 0x240ca1ccUL,
  0x2de92c6fUL, 0x4a7484aaUL, 0x5cb0a9dcUL, 0x76f988daUL,
  0x983e5152UL, 0xa831c66dUL, 0xb00327c8UL, 0xbf597fc7UL,
  0xc6e00bf3UL, 0xd5a79147UL, 0x06ca6351UL, 0x14292967UL,
  0x27b70a85UL, 0x2e1b2138UL, 0x4d2c6dfcUL, 0x53380d13UL,
  0x650a7354UL, 0x766a0abbUL, 0x81c2c92eUL, 0x92722c85UL,
  0xa2bfe8a1UL, 0xa81a664bUL, 0xc24b8b70UL, 0xc76c51a3UL,
  0xd192e819UL, 0xd6990624UL, 0xf40e3585UL, 0x106aa070UL,
  0x19a4c116UL, 0x1e376c08UL, 0x2748774cUL, 0x34b0bcb5UL,
  0x391c0cb3UL, 0x4ed8aa4aUL, 0x5b9cca4fUL, 0x682e6ff3UL,
  0x748f82eeUL, 0x78a5636fUL, 0x84c87814UL, 0x8cc70208UL,
  0x90befffaUL, 0xa4506cebUL, 0xbef9a3f7UL, 0xc67178f2UL,
};

/* Round functions.  */
#define F2(A,B,C) ( ( A & B ) | ( C & ( A | B ) ) )
#define F1(E,F,G) ( G ^ ( E & ( F ^ G ) ) )

/* Process LEN bytes of BUFFER, accumulating context into CTX.
   It is assumed that LEN % 64 == 0.
   Most of this code comes from GnuPG's cipher/sha1.c.  */

void
sha256_process_block (const void *buffer, size_t len, struct sha256_ctx *ctx)
{
  const uint32_t *words = buffer;
  size_t nwords = len / sizeof (uint32_t);
  const uint32_t *endp = words + nwords;
  uint32_t x[16];
  uint32_t a = ctx->state[0];
  uint32_t b = ctx->state[1];
  uint32_t c = ctx->state[2];
  uint32_t d = ctx->state[3];
  uint32_t e = ctx->state[4];
  uint32_t f = ctx->state[5];
  uint32_t g = ctx->state[6];
  uint32_t h = ctx->state[7];
  uint32_t lolen = len;

  /* First increment the byte count.  FIPS PUB 180-2 specifies the possible
     length of the file up to 2^64 bits.  Here we only compute the
     number of bytes.  Do a double word increment.  */
  ctx->total[0] += lolen;
  ctx->total[1] += (len >> 31 >> 1) + (ctx->total[0] < lolen);

#define rol(x, n) (((x) << (n)) | ((x) >> (32 - (n))))
#define S0(x) (rol(x,25)^rol(x,14)^(x>>3))
#define S1(x) (rol(x,15)^rol(x,13)^(x>>10))
#define SS0(x) (rol(x,30)^rol(x,19)^rol(x,10))
#define SS1(x) (rol(x,26)^rol(x,21)^rol(x,7))

#define M(I) ( tm =   S1(x[(I-2)&0x0f]) + x[(I-7)&0x0f] \
                    + S0(x[(I-15)&0x0f]) + x[I&0x0f]    \
               , x[I&0x0f] = tm )

#define R(A,B,C,D,E,F,G,H,K,M)  do { t0 = SS0(A) + F2(A,B,C); \
                                     t1 = H + SS1(E)  \
                                      + F1(E,F,G)     \
                                      + K             \
                                      + M;            \
                                     D += t1;  H = t0 + t1; \
                               } while(0)

  while (words < endp)
    {
      uint32_t tm;
      uint32_t t0, t1;
      int t;
      /* FIXME: see sha1.c for a better implementation.  */
      for (t = 0; t < 16; t++)
        {
          x[t] = SWAP (*words);
          words++;
        }

      R( a, b, c, d, e, f, g, h, K( 0), x[ 0] );
      R( h, a, b, c, d, e, f, g, K( 1), x[ 1] );
      R( g, h, a, b, c, d, e, f, K( 2), x[ 2] );
      R( f, g, h, a, b, c, d, e, K( 3), x[ 3] );
      R( e, f, g, h, a, b, c, d, K( 4), x[ 4] );
      R( d, e, f, g, h, a, b, c, K( 5), x[ 5] );
      R( c, d, e, f, g, h, a, b, K( 6), x[ 6] );
      R( b, c, d, e, f, g, h, a, K( 7), x[ 7] );
      R( a, b, c, d, e, f, g, h, K( 8), x[ 8] );
      R( h, a, b, c, d, e, f, g, K( 9), x[ 9] );
      R( g, h, a, b, c, d, e, f, K(10), x[10] );
      R( f, g, h, a, b, c, d, e, K(11), x[11] );
      R( e, f, g, h, a, b, c, d, K(12), x[12] );
      R( d, e, f, g, h, a, b, c, K(13), x[13] );
      R( c, d, e, f, g, h, a, b, K(14), x[14] );
      R( b, c, d, e, f, g, h, a, K(15), x[15] );
      R( a, b, c, d, e, f, g, h, K(16), M(16) );
      R( h, a, b, c, d, e, f, g, K(17), M(17) );
      R( g, h, a, b, c, d, e, f, K(18), M(18) );
      R( f, g, h, a, b, c, d, e, K(19), M(19) );
      R( e, f, g, h, a, b, c, d, K(20), M(20) );
      R( d, e, f, g, h, a, b, c, K(21), M(21) );
      R( c, d, e, f, g, h, a, b, K(22), M(22) );
      R( b, c, d, e, f, g, h, a, K(23), M(23) );
      R( a, b, c, d, e, f, g, h, K(24), M(24) );
      R( h, a, b, c, d, e, f, g, K(25), M(25) );
      R( g, h, a, b, c, d, e, f, K(26), M(26) );
      R( f, g, h, a, b, c, d, e, K(27), M(27) );
      R( e, f, g, h, a, b, c, d, K(28), M(28) );
      R( d, e, f, g, h, a, b, c, K(29), M(29) );
      R( c, d, e, f, g, h, a, b, K(30), M(30) );
      R( b, c, d, e, f, g, h, a, K(31), M(31) );
      R( a, b, c, d, e, f, g, h, K(32), M(32) );
      R( h, a, b, c, d, e, f, g, K(33), M(33) );
      R( g, h, a, b, c, d, e, f, K(34), M(34) );
      R( f, g, h, a, b, c, d, e, K(35), M(35) );
      R( e, f, g, h, a, b, c, d, K(36), M(36) );
      R( d, e, f, g, h, a, b, c, K(37), M(37) );
      R( c, d, e, f, g, h, a, b, K(38), M(38) );
      R( b, c, d, e, f, g, h, a, K(39), M(39) );
      R( a, b, c, d, e, f, g, h, K(40), M(40) );
      R( h, a, b, c, d, e, f, g, K(41), M(41) );
      R( g, h, a, b, c, d, e, f, K(42), M(42) );
      R( f, g, h, a, b, c, d, e, K(43), M(43) );
      R( e, f, g, h, a, b, c, d, K(44), M(44) );
      R( d, e, f, g, h, a, b, c, K(45), M(45) );
      R( c, d, e, f, g, h, a, b, K(46), M(46) );
      R( b, c, d, e, f, g, h, a, K(47), M(47) );
      R( a, b, c, d, e, f, g, h, K(48), M(48) );
      R( h, a, b, c, d, e, f, g, K(49), M(49) );
      R( g, h, a, b, c, d, e, f, K(50), M(50) );
      R( f, g, h, a, b, c, d, e, K(51), M(51) );
      R( e, f, g, h, a, b, c, d, K(52), M(52) );
      R( d, e, f, g, h, a, b, c, K(53), M(53) );
      R( c, d, e, f, g, h, a, b, K(54), M(54) );
      R( b, c, d, e, f, g, h, a, K(55), M(55) );
      R( a, b, c, d, e, f, g, h, K(56), M(56) );
      R( h, a, b, c, d, e, f, g, K(57), M(57) );
      R( g, h, a, b, c, d, e, f, K(58), M(58) );
      R( f, g, h, a, b, c, d, e, K(59), M(59) );
      R( e, f, g, h, a, b, c, d, K(60), M(60) );
      R( d, e, f, g, h, a, b, c, K(61), M(61) );
      R( c, d, e, f, g, h, a, b, K(62), M(62) );
      R( b, c, d, e, f, g, h, a, K(63), M(63) );

      a = ctx->state[0] += a;
      b = ctx->state[1] += b;
      c = ctx->state[2] += c;
      d = ctx->state[3] += d;
      e = ctx->state[4] += e;
      f = ctx->state[5] += f;
      g = ctx->state[6] += g;
      h = ctx->state[7] += h;
    }
}

int main (int argc, char** argv) {
  FILE* fp = fopen(argv[1], "rb");
  uint8_t* digest = calloc(1, 32);
  sha256_stream(fp, (void*)digest);
  for (int i = 0; i < 32; i++) {
    fprintf(stdout, "%.2x", digest[i] & 0xff);
  }
  fprintf(stdout, "\n");
  return 0;
}

## sha256sum.js
const { createHash } = require('crypto')
const { openSync, readSync } = require('fs')

function checksum (file) {
  const source = Buffer.alloc(65536)
  const fd = openSync(file)
  const hash = createHash('sha256')
  let bytes = readSync(fd, source)
  while (bytes > 0) {
    if (bytes < 65536) {
      hash.update(source.slice(0, bytes))
    } else {
      hash.update(source)
    }
    bytes = readSync(fd, source)
  }
  return hash.digest('hex')
}

console.log(checksum(process.argv[2] || 'random.bin'))
	#!/bin/bash
	_term() {
	echo "Caught SIGTERM signal!"
	kill -TERM "$pid" 2>/dev/null
	}
	trap _term SIGTERM
	dest="/tmp/FlameGraph"
	if [ ! -d "/tmp/FlameGraph" ]; then
	echo "installing flamegraph in /tmp/FlameGraph"
	mkdir -p $dest
	curl -L -o /tmp/FlameGraph/stackcollapse.pl https://raw.githubusercontent.com/brendangregg/FlameGraph/v1.0/stackcollapse-perf.pl
	curl -L -o /tmp/FlameGraph/flamegraph.pl https://raw.githubusercontent.com/brendangregg/FlameGraph/v1.0/flamegraph.pl
	fi
	wd=$(pwd)
	rm -f out.svg
	if [ "$1" = "c" ] ; then
	./sha256sum /dev/shm/random.bin &
	FNAME="c.svg"
	else
	node --perf-basic-prof sha256sum-node.js /dev/shm/random.bin &
	FNAME="node.svg"
	fi
	pid=$!
	echo "running perf for 30 seconds"
	sudo perf record -F 99 -p $pid -g -- sleep 5
	sudo perf script > out.stack
	perl $dest/stackcollapse.pl < $wd/out.stack \| perl $dest/flamegraph.pl > $wd/$FNAME
	echo "perf complete, killing $pid"
	kill -9 $pid 2>/dev/null
	# suppress the terminated alert
	wait $pid 2>/dev/null
	sudo rm -f /tmp/perf-*.map
	rm -f isolate-*.log
	rm -f out.stack
	sudo rm -f perf.data.old
	sudo rm -f perf.data
	echo done
	// adapted from here: https://github.com/SamB/debian-coreutils/blob/master/lib/sha256.c

	/* sha256.c - Functions to compute SHA256 and SHA224 message digest of files or
	memory blocks according to the NIST specification FIPS-180-2.
	Copyright (C) 2005-2006, 2008-2012 Free Software Foundation, Inc.
	This program is free software: you can redistribute it and/or modify
	it under the terms of the GNU General Public License as published by
	the Free Software Foundation, either version 3 of the License, or
	(at your option) any later version.
	This program is distributed in the hope that it will be useful,
	but WITHOUT ANY WARRANTY; without even the implied warranty of
	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	GNU General Public License for more details.
	You should have received a copy of the GNU General Public License
	along with this program. If not, see <http://www.gnu.org/licenses/>. */

	/* Written by David Madore, considerably copypasting from
	Scott G. Miller's sha1.c
	*/

	#include <stdio.h>
	#include <stdint.h>
	#include <stdalign.h>
	#include <stdint.h>
	#include <stdlib.h>
	#include <string.h>

	struct sha256_ctx
	{
	uint32_t state[8];
	uint32_t total[2];
	size_t buflen;
	uint32_t buffer[32];
	};

	enum { SHA224_DIGEST_SIZE = 224 / 8 };
	enum { SHA256_DIGEST_SIZE = 256 / 8 };

	# define SWAP(n) \
	(((n) << 24) \| (((n) & 0xff00) << 8) \| (((n) >> 8) & 0xff00) \| ((n) >> 24))

	#define BLOCKSIZE 32768
	static const unsigned char fillbuf[64] = { 0x80, 0 /* , 0, 0, ... */ };

	void
	sha256_init_ctx (struct sha256_ctx *ctx)
	{
	ctx->state[0] = 0x6a09e667UL;
	ctx->state[1] = 0xbb67ae85UL;
	ctx->state[2] = 0x3c6ef372UL;
	ctx->state[3] = 0xa54ff53aUL;
	ctx->state[4] = 0x510e527fUL;
	ctx->state[5] = 0x9b05688cUL;
	ctx->state[6] = 0x1f83d9abUL;
	ctx->state[7] = 0x5be0cd19UL;

	ctx->total[0] = ctx->total[1] = 0;
	ctx->buflen = 0;
	}

	static inline void
	set_uint32 (char *cp, uint32_t v)
	{
	memcpy (cp, &v, sizeof v);
	}

	void *
	sha256_read_ctx (const struct sha256_ctx ctx, void resbuf)
	{
	int i;
	char *r = resbuf;

	for (i = 0; i < 8; i++)
	set_uint32 (r + i * sizeof ctx->state[0], SWAP (ctx->state[i]));

	return resbuf;
	}


	static void
	sha256_conclude_ctx (struct sha256_ctx *ctx)
	{
	/* Take yet unprocessed bytes into account. */
	size_t bytes = ctx->buflen;
	size_t size = (bytes < 56) ? 64 / 4 : 64 * 2 / 4;

	/* Now count remaining bytes. */
	ctx->total[0] += bytes;
	if (ctx->total[0] < bytes)
	++ctx->total[1];

	/* Put the 64-bit file length in bits at the end of the buffer.
	Use set_uint32 rather than a simple assignment, to avoid risk of
	unaligned access. */
	set_uint32 ((char *) &ctx->buffer[size - 2],
	SWAP ((ctx->total[1] << 3) \| (ctx->total[0] >> 29)));
	set_uint32 ((char *) &ctx->buffer[size - 1],
	SWAP (ctx->total[0] << 3));

	memcpy (&((char ) ctx->buffer)[bytes], fillbuf, (size - 2) 4 - bytes);

	/* Process last bytes. */
	sha256_process_block (ctx->buffer, size * 4, ctx);
	}

	void *
	sha256_finish_ctx (struct sha256_ctx ctx, void resbuf)
	{
	sha256_conclude_ctx (ctx);
	return sha256_read_ctx (ctx, resbuf);
	}

	int
	sha256_stream (FILE stream, void resblock)
	{
	struct sha256_ctx ctx;
	size_t sum;

	char *buffer = malloc (BLOCKSIZE + 72);
	if (!buffer)
	return 1;

	/* Initialize the computation context. */
	sha256_init_ctx (&ctx);

	/* Iterate over full file contents. */
	while (1)
	{
	/* We read the file in blocks of BLOCKSIZE bytes. One call of the
	computation function processes the whole buffer so that with the
	next round of the loop another block can be read. */
	size_t n;
	sum = 0;

	/* Read block. Take care for partial reads. */
	while (1)
	{
	n = fread (buffer + sum, 1, BLOCKSIZE - sum, stream);

	sum += n;

	if (sum == BLOCKSIZE)
	break;

	if (n == 0)
	{
	/* Check for the error flag IFF N == 0, so that we don't
	exit the loop after a partial read due to e.g., EAGAIN
	or EWOULDBLOCK. */
	if (ferror (stream))
	{
	free (buffer);
	return 1;
	}
	goto process_partial_block;
	}

	/* We've read at least one byte, so ignore errors. But always
	check for EOF, since feof may be true even though N > 0.
	Otherwise, we could end up calling fread after EOF. */
	if (feof (stream))
	goto process_partial_block;
	}

	/* Process buffer with BLOCKSIZE bytes. Note that
	BLOCKSIZE % 64 == 0
	*/
	sha256_process_block (buffer, BLOCKSIZE, &ctx);
	}

	process_partial_block:;

	/* Process any remaining bytes. */
	if (sum > 0)
	sha256_process_bytes (buffer, sum, &ctx);

	/* Construct result in desired memory. */
	sha256_finish_ctx (&ctx, resblock);
	free (buffer);
	return 0;
	}

	void *
	sha256_buffer (const char buffer, size_t len, void resblock)
	{
	struct sha256_ctx ctx;

	/* Initialize the computation context. */
	sha256_init_ctx (&ctx);

	/* Process whole buffer but last len % 64 bytes. */
	sha256_process_bytes (buffer, len, &ctx);

	/* Put result in desired memory area. */
	return sha256_finish_ctx (&ctx, resblock);
	}

	void
	sha256_process_bytes (const void buffer, size_t len, struct sha256_ctx ctx)
	{
	/* When we already have some bits in our internal buffer concatenate
	both inputs first. */
	if (ctx->buflen != 0)
	{
	size_t left_over = ctx->buflen;
	size_t add = 128 - left_over > len ? len : 128 - left_over;

	memcpy (&((char *) ctx->buffer)[left_over], buffer, add);
	ctx->buflen += add;

	if (ctx->buflen > 64)
	{
	sha256_process_block (ctx->buffer, ctx->buflen & ~63, ctx);

	ctx->buflen &= 63;
	/* The regions in the following copy operation cannot overlap. */
	memcpy (ctx->buffer,
	&((char *) ctx->buffer)[(left_over + add) & ~63],
	ctx->buflen);
	}

	buffer = (const char *) buffer + add;
	len -= add;
	}

	/* Process available complete blocks. */
	if (len >= 64)
	{
	#if !_STRING_ARCH_unaligned
	# define UNALIGNED_P(p) ((uintptr_t) (p) % alignof (uint32_t) != 0)
	if (UNALIGNED_P (buffer))
	while (len > 64)
	{
	sha256_process_block (memcpy (ctx->buffer, buffer, 64), 64, ctx);
	buffer = (const char *) buffer + 64;
	len -= 64;
	}
	else
	#endif
	{
	sha256_process_block (buffer, len & ~63, ctx);
	buffer = (const char *) buffer + (len & ~63);
	len &= 63;
	}
	}

	/* Move remaining bytes in internal buffer. */
	if (len > 0)
	{
	size_t left_over = ctx->buflen;

	memcpy (&((char *) ctx->buffer)[left_over], buffer, len);
	left_over += len;
	if (left_over >= 64)
	{
	sha256_process_block (ctx->buffer, 64, ctx);
	left_over -= 64;
	memcpy (ctx->buffer, &ctx->buffer[16], left_over);
	}
	ctx->buflen = left_over;
	}
	}

	/* --- Code below is the primary difference between sha1.c and sha256.c --- */

	/* SHA256 round constants */
	#define K(I) sha256_round_constants[I]
	static const uint32_t sha256_round_constants[64] = {
	0x428a2f98UL, 0x71374491UL, 0xb5c0fbcfUL, 0xe9b5dba5UL,
	0x3956c25bUL, 0x59f111f1UL, 0x923f82a4UL, 0xab1c5ed5UL,
	0xd807aa98UL, 0x12835b01UL, 0x243185beUL, 0x550c7dc3UL,
	0x72be5d74UL, 0x80deb1feUL, 0x9bdc06a7UL, 0xc19bf174UL,
	0xe49b69c1UL, 0xefbe4786UL, 0x0fc19dc6UL, 0x240ca1ccUL,
	0x2de92c6fUL, 0x4a7484aaUL, 0x5cb0a9dcUL, 0x76f988daUL,
	0x983e5152UL, 0xa831c66dUL, 0xb00327c8UL, 0xbf597fc7UL,
	0xc6e00bf3UL, 0xd5a79147UL, 0x06ca6351UL, 0x14292967UL,
	0x27b70a85UL, 0x2e1b2138UL, 0x4d2c6dfcUL, 0x53380d13UL,
	0x650a7354UL, 0x766a0abbUL, 0x81c2c92eUL, 0x92722c85UL,
	0xa2bfe8a1UL, 0xa81a664bUL, 0xc24b8b70UL, 0xc76c51a3UL,
	0xd192e819UL, 0xd6990624UL, 0xf40e3585UL, 0x106aa070UL,
	0x19a4c116UL, 0x1e376c08UL, 0x2748774cUL, 0x34b0bcb5UL,
	0x391c0cb3UL, 0x4ed8aa4aUL, 0x5b9cca4fUL, 0x682e6ff3UL,
	0x748f82eeUL, 0x78a5636fUL, 0x84c87814UL, 0x8cc70208UL,
	0x90befffaUL, 0xa4506cebUL, 0xbef9a3f7UL, 0xc67178f2UL,
	};

	/* Round functions. */
	#define F2(A,B,C) ( ( A & B ) \| ( C & ( A \| B ) ) )
	#define F1(E,F,G) ( G ^ ( E & ( F ^ G ) ) )

	/* Process LEN bytes of BUFFER, accumulating context into CTX.
	It is assumed that LEN % 64 == 0.
	Most of this code comes from GnuPG's cipher/sha1.c. */

	void
	sha256_process_block (const void buffer, size_t len, struct sha256_ctx ctx)
	{
	const uint32_t *words = buffer;
	size_t nwords = len / sizeof (uint32_t);
	const uint32_t *endp = words + nwords;
	uint32_t x[16];
	uint32_t a = ctx->state[0];
	uint32_t b = ctx->state[1];
	uint32_t c = ctx->state[2];
	uint32_t d = ctx->state[3];
	uint32_t e = ctx->state[4];
	uint32_t f = ctx->state[5];
	uint32_t g = ctx->state[6];
	uint32_t h = ctx->state[7];
	uint32_t lolen = len;

	/* First increment the byte count. FIPS PUB 180-2 specifies the possible
	length of the file up to 2^64 bits. Here we only compute the
	number of bytes. Do a double word increment. */
	ctx->total[0] += lolen;
	ctx->total[1] += (len >> 31 >> 1) + (ctx->total[0] < lolen);

	#define rol(x, n) (((x) << (n)) \| ((x) >> (32 - (n))))
	#define S0(x) (rol(x,25)^rol(x,14)^(x>>3))
	#define S1(x) (rol(x,15)^rol(x,13)^(x>>10))
	#define SS0(x) (rol(x,30)^rol(x,19)^rol(x,10))
	#define SS1(x) (rol(x,26)^rol(x,21)^rol(x,7))

	#define M(I) ( tm = S1(x[(I-2)&0x0f]) + x[(I-7)&0x0f] \
	+ S0(x[(I-15)&0x0f]) + x[I&0x0f] \
	, x[I&0x0f] = tm )

	#define R(A,B,C,D,E,F,G,H,K,M) do { t0 = SS0(A) + F2(A,B,C); \
	t1 = H + SS1(E) \
	+ F1(E,F,G) \
	+ K \
	+ M; \
	D += t1; H = t0 + t1; \
	} while(0)

	while (words < endp)
	{
	uint32_t tm;
	uint32_t t0, t1;
	int t;
	/* FIXME: see sha1.c for a better implementation. */
	for (t = 0; t < 16; t++)
	{
	x[t] = SWAP (*words);
	words++;
	}

	R( a, b, c, d, e, f, g, h, K( 0), x[ 0] );
	R( h, a, b, c, d, e, f, g, K( 1), x[ 1] );
	R( g, h, a, b, c, d, e, f, K( 2), x[ 2] );
	R( f, g, h, a, b, c, d, e, K( 3), x[ 3] );
	R( e, f, g, h, a, b, c, d, K( 4), x[ 4] );
	R( d, e, f, g, h, a, b, c, K( 5), x[ 5] );
	R( c, d, e, f, g, h, a, b, K( 6), x[ 6] );
	R( b, c, d, e, f, g, h, a, K( 7), x[ 7] );
	R( a, b, c, d, e, f, g, h, K( 8), x[ 8] );
	R( h, a, b, c, d, e, f, g, K( 9), x[ 9] );
	R( g, h, a, b, c, d, e, f, K(10), x[10] );
	R( f, g, h, a, b, c, d, e, K(11), x[11] );
	R( e, f, g, h, a, b, c, d, K(12), x[12] );
	R( d, e, f, g, h, a, b, c, K(13), x[13] );
	R( c, d, e, f, g, h, a, b, K(14), x[14] );
	R( b, c, d, e, f, g, h, a, K(15), x[15] );
	R( a, b, c, d, e, f, g, h, K(16), M(16) );
	R( h, a, b, c, d, e, f, g, K(17), M(17) );
	R( g, h, a, b, c, d, e, f, K(18), M(18) );
	R( f, g, h, a, b, c, d, e, K(19), M(19) );
	R( e, f, g, h, a, b, c, d, K(20), M(20) );
	R( d, e, f, g, h, a, b, c, K(21), M(21) );
	R( c, d, e, f, g, h, a, b, K(22), M(22) );
	R( b, c, d, e, f, g, h, a, K(23), M(23) );
	R( a, b, c, d, e, f, g, h, K(24), M(24) );
	R( h, a, b, c, d, e, f, g, K(25), M(25) );
	R( g, h, a, b, c, d, e, f, K(26), M(26) );
	R( f, g, h, a, b, c, d, e, K(27), M(27) );
	R( e, f, g, h, a, b, c, d, K(28), M(28) );
	R( d, e, f, g, h, a, b, c, K(29), M(29) );
	R( c, d, e, f, g, h, a, b, K(30), M(30) );
	R( b, c, d, e, f, g, h, a, K(31), M(31) );
	R( a, b, c, d, e, f, g, h, K(32), M(32) );
	R( h, a, b, c, d, e, f, g, K(33), M(33) );
	R( g, h, a, b, c, d, e, f, K(34), M(34) );
	R( f, g, h, a, b, c, d, e, K(35), M(35) );
	R( e, f, g, h, a, b, c, d, K(36), M(36) );
	R( d, e, f, g, h, a, b, c, K(37), M(37) );
	R( c, d, e, f, g, h, a, b, K(38), M(38) );
	R( b, c, d, e, f, g, h, a, K(39), M(39) );
	R( a, b, c, d, e, f, g, h, K(40), M(40) );
	R( h, a, b, c, d, e, f, g, K(41), M(41) );
	R( g, h, a, b, c, d, e, f, K(42), M(42) );
	R( f, g, h, a, b, c, d, e, K(43), M(43) );
	R( e, f, g, h, a, b, c, d, K(44), M(44) );
	R( d, e, f, g, h, a, b, c, K(45), M(45) );
	R( c, d, e, f, g, h, a, b, K(46), M(46) );
	R( b, c, d, e, f, g, h, a, K(47), M(47) );
	R( a, b, c, d, e, f, g, h, K(48), M(48) );
	R( h, a, b, c, d, e, f, g, K(49), M(49) );
	R( g, h, a, b, c, d, e, f, K(50), M(50) );
	R( f, g, h, a, b, c, d, e, K(51), M(51) );
	R( e, f, g, h, a, b, c, d, K(52), M(52) );
	R( d, e, f, g, h, a, b, c, K(53), M(53) );
	R( c, d, e, f, g, h, a, b, K(54), M(54) );
	R( b, c, d, e, f, g, h, a, K(55), M(55) );
	R( a, b, c, d, e, f, g, h, K(56), M(56) );
	R( h, a, b, c, d, e, f, g, K(57), M(57) );
	R( g, h, a, b, c, d, e, f, K(58), M(58) );
	R( f, g, h, a, b, c, d, e, K(59), M(59) );
	R( e, f, g, h, a, b, c, d, K(60), M(60) );
	R( d, e, f, g, h, a, b, c, K(61), M(61) );
	R( c, d, e, f, g, h, a, b, K(62), M(62) );
	R( b, c, d, e, f, g, h, a, K(63), M(63) );

	a = ctx->state[0] += a;
	b = ctx->state[1] += b;
	c = ctx->state[2] += c;
	d = ctx->state[3] += d;
	e = ctx->state[4] += e;
	f = ctx->state[5] += f;
	g = ctx->state[6] += g;
	h = ctx->state[7] += h;
	}
	}

	int main (int argc, char** argv) {
	FILE* fp = fopen(argv[1], "rb");
	uint8_t* digest = calloc(1, 32);
	sha256_stream(fp, (void*)digest);
	for (int i = 0; i < 32; i++) {
	fprintf(stdout, "%.2x", digest[i] & 0xff);
	}
	fprintf(stdout, "\n");
	return 0;
	}
	const { createHash } = require('crypto')
	const { openSync, readSync } = require('fs')

	function checksum (file) {
	const source = Buffer.alloc(65536)
	const fd = openSync(file)
	const hash = createHash('sha256')
	let bytes = readSync(fd, source)
	while (bytes > 0) {
	if (bytes < 65536) {
	hash.update(source.slice(0, bytes))
	} else {
	hash.update(source)
	}
	bytes = readSync(fd, source)
	}
	return hash.digest('hex')
	}

	console.log(checksum(process.argv[2] \|\| 'random.bin'))