Skip to content

Instantly share code, notes, and snippets.

@darkarnium
Last active January 27, 2022 13:56
Show Gist options
  • Save darkarnium/56c078c30bb359d8e013e8f56af80c3d to your computer and use it in GitHub Desktop.
Save darkarnium/56c078c30bb359d8e013e8f56af80c3d to your computer and use it in GitHub Desktop.
Go vs Python - SHA1 and MD5
package hasher
import (
"crypto/md5"
"crypto/sha1"
"encoding/hex"
"io"
"os"
)
func HashSHA1(chunk int) string {
file, err := os.Open("nexus_latest.tar")
if err != nil {
panic(err)
}
defer file.Close()
hash := sha1.New()
buffer := make([]byte, chunk)
for {
n, err := file.Read(buffer)
hash.Write(buffer[0:n])
if err == io.EOF {
break
}
}
return hex.EncodeToString(hash.Sum(nil))
}
func HashMD5(chunk int) string {
file, err := os.Open("nexus_latest.tar")
if err != nil {
panic(err)
}
defer file.Close()
hash := md5.New()
buffer := make([]byte, chunk)
for {
n, err := file.Read(buffer)
hash.Write(buffer[0:n])
if err == io.EOF {
break
}
}
return hex.EncodeToString(hash.Sum(nil))
}
func HashSHA1Copy() string {
file, err := os.Open("nexus_latest.tar")
if err != nil {
panic(err)
}
defer file.Close()
hash := sha1.New()
io.Copy(hash, file)
return hex.EncodeToString(hash.Sum(nil))
}
func HashMD5Copy() string {
file, err := os.Open("nexus_latest.tar")
if err != nil {
panic(err)
}
defer file.Close()
hash := md5.New()
io.Copy(hash, file)
return hex.EncodeToString(hash.Sum(nil))
}
"""Compare hash rates of MD5 and SHA1 over N rounds and X chunk size."""
import sys
import timeit
import hashlib
def hash(sz=10240, func=hashlib.sha1):
h = func()
with open('nexus_latest.tar', "rb") as fin:
while chunk := fin.read(sz):
h.update(chunk)
return h.hexdigest()
def benchmark_md5_chunk_8(rounds: int):
md5 = timeit.Timer(lambda: hash(sz=8 * 1024, func=hashlib.md5)).timeit(number = rounds)
print(f"ok\thasher\t{md5}s")
def benchmark_sha1_chunk_8(rounds: int):
md5 = timeit.Timer(lambda: hash(sz=8 * 1024, func=hashlib.sha1)).timeit(number = rounds)
print(f"ok\thasher\t{md5}s")
if __name__ == "__main__":
# This is amazingly gross, but we're a benchmark.
if len(sys.argv) < 3:
print("Usage: hasher.py <case> <rounds>")
sys.exit(0)
case = getattr(sys.modules[__name__], sys.argv[1])
count = int(sys.argv[2])
case(count)
package hasher
import (
"testing"
)
var result string
func BenchmarkMD5Chunk8(b *testing.B) {
var r string
for i := 0; i < b.N; i++ {
r = HashMD5(8 * 1024)
}
result = r
}
func BenchmarkSHA1Chunk8(b *testing.B) {
var r string
for i := 0; i < b.N; i++ {
r = HashSHA1(8 * 1024)
}
result = r
}
func BenchmarkMD5Copy(b *testing.B) {
var r string
for i := 0; i < b.N; i++ {
r = HashMD5Copy()
}
result = r
}
func BenchmarkSHA1Copy(b *testing.B) {
var r string
for i := 0; i < b.N; i++ {
r = HashSHA1Copy()
}
result = r
}
@darkarnium
Copy link
Author

Running on FreeBSD with io.copy() and open(...).read() as well as 8K chunks in Python:

% go test -bench=BenchmarkSHA1Copy -count 10 | grep -iE ^ok
ok      hasher  12.108s

% python3.8 hasher.py benchmark_sha1_chunk_8 10
ok      hasher  1.1660769821610302s

% python3.8 hasher.py benchmark_sha1_chunk_copy 10
ok      hasher  1.603817748837173s

Version:

% go version
go version go1.17.5 freebsd/amd64

@darkarnium
Copy link
Author

darkarnium commented Jan 27, 2022

Python buffering disabled on open() on macOS:

Via open(..., 0).read():

$ egrep -i 'def hash_copy\(' -A 4 hasher.py | grep -i open
    h.update(open('nexus_latest.tar', "rb", 0).read())

$ python3.9 hasher.py benchmark_md5_chunk_copy 10
ok	hasher	12.137283342s

$ python3.9 hasher.py benchmark_sha1_chunk_copy 10
ok	hasher	12.106864423000001s

Chunked read with open(..., 0):

$ egrep -i 'def hash\(' -A 4 hasher.py | grep -i open
    with open('nexus_latest.tar', "rb", 0) as fin:

$ python3.9 hasher.py benchmark_sha1_chunk_8 10
ok	hasher	7.7224440329999995s

$ python3.9 hasher.py benchmark_md5_chunk_8 10
ok	hasher	10.369061641s

Versions:

$ uname -a
Darwin Callisto.local 20.6.0 Darwin Kernel Version 20.6.0: Tue Oct 12 18:33:42 PDT 2021; root:xnu-7195.141.8~1/RELEASE_X86_64 x86_64

@darkarnium
Copy link
Author

Full output requested when run with -benchtime 10x and -count 10:

MD5;

$ go test -bench=BenchmarkMD5Chunk8 -benchtime 10x
goos: darwin
goarch: amd64
pkg: hasher
cpu: Intel(R) Core(TM) i5-8259U CPU @ 2.30GHz
BenchmarkMD5Chunk8-8   	      10	1005807555 ns/op
PASS
ok  	hasher	11.403s

$ go test -bench=BenchmarkMD5Chunk8 -count 10
goos: darwin
goarch: amd64
pkg: hasher
cpu: Intel(R) Core(TM) i5-8259U CPU @ 2.30GHz
BenchmarkMD5Chunk8-8   	       1	1054308178 ns/op
BenchmarkMD5Chunk8-8   	       1	1040079144 ns/op
BenchmarkMD5Chunk8-8   	       1	1020537304 ns/op
BenchmarkMD5Chunk8-8   	       1	1021756455 ns/op
BenchmarkMD5Chunk8-8   	       1	1080426179 ns/op
BenchmarkMD5Chunk8-8   	       1	1012884916 ns/op
BenchmarkMD5Chunk8-8   	       1	1006444572 ns/op
BenchmarkMD5Chunk8-8   	       1	1003369154 ns/op
BenchmarkMD5Chunk8-8   	       1	1009236434 ns/op
BenchmarkMD5Chunk8-8   	       2	1001233669 ns/op
PASS
ok  	hasher	12.353s

SHA1:

$ go test -bench=BenchmarkSHA1Chunk8 -benchtime 10x
goos: darwin
goarch: amd64
pkg: hasher
cpu: Intel(R) Core(TM) i5-8259U CPU @ 2.30GHz
BenchmarkSHA1Chunk8-8   	      10	 772829689 ns/op
PASS
ok  	hasher	8.672s

$ go test -bench=BenchmarkSHA1Chunk8 -count 10
goos: darwin
goarch: amd64
pkg: hasher
cpu: Intel(R) Core(TM) i5-8259U CPU @ 2.30GHz
BenchmarkSHA1Chunk8-8   	       2	 758764051 ns/op
BenchmarkSHA1Chunk8-8   	       2	 756892006 ns/op
BenchmarkSHA1Chunk8-8   	       2	 758007766 ns/op
BenchmarkSHA1Chunk8-8   	       2	 756452965 ns/op
BenchmarkSHA1Chunk8-8   	       2	 760778676 ns/op
BenchmarkSHA1Chunk8-8   	       2	 753645592 ns/op
BenchmarkSHA1Chunk8-8   	       2	 753705684 ns/op
BenchmarkSHA1Chunk8-8   	       2	 753714502 ns/op
BenchmarkSHA1Chunk8-8   	       2	 757031118 ns/op
BenchmarkSHA1Chunk8-8   	       2	 751179043 ns/op
PASS
ok  	hasher	22.982s

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment