Skip to content

Instantly share code, notes, and snippets.

@pdaian
Last active November 28, 2018 09:09
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save pdaian/2e10d273210cc02fd510f02ce8a8e12c to your computer and use it in GitHub Desktop.
Save pdaian/2e10d273210cc02fd510f02ce8a8e12c to your computer and use it in GitHub Desktop.
SHA256 short ID collision mining tool (unoptimized)
# Lots of code stolen from https://raw.githubusercontent.com/Simmesimme/cry-hash/master/03/collisions.py
import hashlib, random, time
from string import digits, ascii_uppercase, ascii_lowercase
from itertools import product
chars = digits + ascii_uppercase + ascii_lowercase
# We use this decorator to run each collision finding task in its own thread
def run_async(func):
from threading import Thread
from functools import wraps
@wraps(func)
def async_func(*args, **kwargs):
func_hl = Thread(target = func, args = args, kwargs = kwargs)
func_hl.start()
return func_hl
return async_func
# Define our f(x), as in the blog post
def get_truncated_hash(message, k, prefix):
return hashlib.sha256(prefix + message).hexdigest()[:k * 2]
# Birthday attack (https://en.wikipedia.org/wiki/Birthday_attack)
@run_async
def simple_birthday(k, prefix):
hash_table = dict()
for n in range(1, 200):
for comb in product(chars, repeat=n):
s = ''.join(comb)
hash = get_truncated_hash(s, k, prefix)
if hash in hash_table:
if hash_table[hash] != s:
print_results(s, hash_table[hash], hash, k, prefix, "birthday")
return
else:
hash_table[hash] = s
print "fail"
# Floyd's algorithm (https://en.wikipedia.org/wiki/Cycle_detection)
@run_async
def floyd(k, prefix, initial):
x0 = initial
m0 = None
m1 = None
turtle = get_truncated_hash(x0, k, prefix)
hare = get_truncated_hash(turtle, k, prefix)
while turtle != hare or (not (hare.isalnum() and turtle.isalnum())):
turtle = get_truncated_hash(turtle, k, prefix)
hare = get_truncated_hash(get_truncated_hash(hare, k, prefix), k, prefix)
pre_period_length = 0
turtle = x0
while turtle != hare:
m0 = turtle
turtle = get_truncated_hash(turtle, k, prefix)
hare = get_truncated_hash(hare, k, prefix)
pre_period_length += 1
if pre_period_length is 0:
print "Failed to find a collision: x0 was in a cycle!"
return
period_length = 1
hare = get_truncated_hash(turtle, k, prefix)
while turtle != hare:
m1 = hare
hare = get_truncated_hash(hare, k, prefix)
period_length += 1
print_results(m0, m1, get_truncated_hash(m0, k, prefix), k, prefix, "floyd")
# Brent's algorithm (https://en.wikipedia.org/wiki/Cycle_detection)
@run_async
def brent(k, prefix, initial):
x0 = initial
m0 = None
m1 = None
turtle = x0
hare = get_truncated_hash(turtle, k, prefix)
power = period_length = 1
while turtle != hare:
if power == period_length:
turtle = hare
power *= 2
period_length = 0
hare = get_truncated_hash(hare, k, prefix)
period_length += 1
pre_period_length = 0
turtle = hare = x0
for i in range(period_length):
hare = get_truncated_hash(hare, k, prefix)
while turtle != hare:
m0 = turtle
m1 = hare
turtle = get_truncated_hash(turtle, k, prefix)
hare = get_truncated_hash(hare, k, prefix)
pre_period_length += 1
if pre_period_length is 0:
print "Failed to find a collision: x0 was in a cycle!"
return
print_results(m0, m1, get_truncated_hash(m0, k, prefix), k, prefix, "brent")
# Pretty print any collisions we do find
def print_results(m0, m1, hash, k, prefix, method):
print "Message A:", prefix + m0
print "Message B:", prefix + m1
print "Same hash:", hash
print method
print time.time()
# For measurement purposes
print time.time()
# Warm up with a 4-byte simple Birthday attack
simple_birthday(4, "Perhaps. ")
# Spawn two threads running Brent's, one starting with string 1 and the other with string 200
# (so they search different graphs)
brent(8, "Perhaps. ", "1")
brent(8, "Perhaps. ", "2")
# Do the same with Floyd's, and make sure they are not searching the same graphs as the Brent's calls
floyd(8, "Perhaps. ", "3")
floyd(8, "Perhaps. ", "4")
@michaeljs1990
Copy link

michaeljs1990 commented Jun 15, 2016

i'm confused by Floyd's algorithm as used here. Can you really encounter a hash cycle as line 47-51 seems to try and determine?

Edit: as usual just being dumb. Took a little more time to see what is actually going on in these functions and it makes sense now.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment