TethysSvensson/crc-identification.py

## crc-identification.py
import random

def mymul(a, b):
    if a > b:
        a, b = b, a

    res = 0
    for n in range(a.bit_length()):
        if a & (1 << n):
            res ^= b << n
    return res

def mydivmod(a, b):
    if b == 0:
        raise ZeroDivisionError

    resd = 0
    resm = a

    for n in range(a.bit_length() - b.bit_length(), -1, -1):
        if resm & (1 << (n + b.bit_length() - 1)):
            resm ^= b << n
            resd ^= 1 << n
    return resd, resm

def mydiv(a, b):
    d, m = mydivmod(a, b)
    assert m == 0
    return d

def mymod(a, b):
    return mydivmod(a, b)[1]

def egcd(a, b):
    x,y, u,v = 0,1, 1,0
    while a != 0:
        q, r = mydivmod(b, a)
        m, n = x ^ mymul(u, q), y ^ mymul(v, q)
        b,a, x,y, u,v = a,r, u,v, m,n
    gcd = b
    return gcd, x, y

def inv(x, p):
    r, a, b = egcd(x, p)
    if r != 1:
        return None
    else:
        return a

def flip(n, w):
    res = 0
    for _ in range(w):
        res = res << 1 | (n & 1)
        n = n >> 1
    if n != 0:
        print w
    assert n == 0
    return res

def identify_raw(f):
    # Populate the cache a bit
    cache = {}
    def get(data):
        cache[data] = f(data)
    get('')
    for p in [0, 1, 4, 32]:
        for c in ['\x00', '\x01', '\x02', '\x40', '\x80']:
            get(c + p * '\x00')

    # Get the number of bits of the polynomial
    # This is a bit dodgy especially for *really* bad
    # polynomials. We hope that our guess is good enough
    w = max(v.bit_length() for v in cache.values())
    upper = 1 << w

    # Create the dictionary of possible candidates
    candidates = {}
    def make(c, refin, refout):
        candidates[c, refin, refout] = crc._make_crc('', c, w, 0, refin, refout, 0, 0)
    make((    cache['\x00'] ^ cache['\x01'])    | upper, False, False)
    make((    cache['\x00'] ^ cache['\x80'])    | upper, True, False)
    make(flip(cache['\x00'] ^ cache['\x01'], w) | upper, False, True)
    make(flip(cache['\x00'] ^ cache['\x80'], w) | upper, True, True)

    # Try everything in the cache
    for k, my_f in list(candidates.items()):
        bad = False
        for s in cache.keys():
            ref = '\x00' * len(s)
            if my_f(s) ^ my_f(ref) != cache[s] ^ cache[ref]:
                del candidates[k]
                break

    # We assert that we have a solution
    # If we get multiple solutions, we assert that they are equivalent
    candidates = candidates.keys()
    if len(candidates) == 1:
        ambigious = False
    else:
        assert len(candidates) == 2
        assert candidates[0][0] == candidates[1][0]
        assert flip(candidates[0][0], w+1) == candidates[0][0]
        assert candidates[0][1] == (not candidates[1][1])
        assert candidates[0][2] == (not candidates[1][2])
        ambigious = True

    poly, refin, refout = candidates[0]

    # Now identify the xor values
    # Start by factoring out values of x^1 + 1 from the polynomial
    # Each factor doubles the number of solutions
    count = 0
    remainder = poly
    while True:
        d, m = mydivmod(remainder, 0b11)
        if m == 0:
            count += 1
            remainder = d
        else:
            break

    # Get the values for f('') and f('\x00') represented
    # as polynomials
    if refout:
        val0 = flip(cache[''], w)
        val8 = flip(cache['\x00'], w)
    else:
        val0 = cache['']
        val8 = cache['\x00']

    # Do some fancy math to find all the solutions
    results = []
    diff = mymod(val0 ^ val8, remainder)
    theinv = inv(mymod(2**8 + 1, remainder), remainder)
    factor = mymul(diff, theinv)
    for x in range(1 << count):
        x = mymul(x, remainder) ^ factor
        x = mymod(x, poly)
        y = int(x ^ val0)
        if refout:
            results.append((poly, w, int(x), refin, refout, flip(y, w)))
        else:
            results.append((poly, w, int(x), refin, refout, y))

    # If we previously found out that the polynomial
    # was ambigious, then double the number of solutions
    # while correcting the reflection suitably
    if ambigious:
        for poly, w, init, refin, refout, xorout in list(results):
            results.append((poly, w, flip(init, w), not refin, not refout, xorout))

    return results

def identify(f):
    # If we get more than a one result, we know that they are
    # equivalent, yet we would like to just return a single one.
    # We score them and return the best result.
    def score((poly, w, init, refin, refout, xorout)):
        table = {
            0: 3,
            (1 << w) - 1: 1
        }

        return sum(table.get(v, 0) for v in [init, xorout]), refin, refout, -init
    return max(identify_raw(f), key = score)

# Test that we correctly identify all functions from the pwntools table
from pwnlib.util import crc
for k, v in crc.all_crcs.items():
    poly, w, init, refin, refout, xorout = identify(getattr(crc, k))
    assert v['poly'] == (poly ^ (1 << (poly.bit_length() - 1)))
    assert v['width'] == w
    assert v['refin'] == refin
    assert v['refout'] == refout
    assert v['init'] == init
    assert v['xorout'] == xorout

# Test that we correctly identify randomly generated CRC functions.
for _ in range(1000):
    w = random.randint(2, 64)
    refin = random.choice([True, False])
    refout = random.choice([True, False])
    poly = random.randint(1, (1 << (w-1))-1) | (1 << w)
    init = random.randint(0, (1 << (w-1))-1)
    xorout = random.randint(0, (1 << (w-1))-1)

    # When refout == True and the polynomial is even,
    # then it introduces a lot of inherent ambiguity,
    # as a smaller polynomial could have been used.
    # I am not sure how to handle that (if it is even possible),
    # so for now we just avoid it. Not that no existing CRC-sums
    # has even polynomials (as it would be stupid to do so).
    if refout:
        poly |= 1

    # Generate the functions
    f = crc._make_crc('', poly, w, init, refin, refout, xorout, 0)

    # Assert that we correctly identified it
    assert (poly, w, init, refin, refout, xorout) in identify_raw(f)
	import random

	def mymul(a, b):
	if a > b:
	a, b = b, a

	res = 0
	for n in range(a.bit_length()):
	if a & (1 << n):
	res ^= b << n
	return res

	def mydivmod(a, b):
	if b == 0:
	raise ZeroDivisionError

	resd = 0
	resm = a

	for n in range(a.bit_length() - b.bit_length(), -1, -1):
	if resm & (1 << (n + b.bit_length() - 1)):
	resm ^= b << n
	resd ^= 1 << n
	return resd, resm

	def mydiv(a, b):
	d, m = mydivmod(a, b)
	assert m == 0
	return d

	def mymod(a, b):
	return mydivmod(a, b)[1]

	def egcd(a, b):
	x,y, u,v = 0,1, 1,0
	while a != 0:
	q, r = mydivmod(b, a)
	m, n = x ^ mymul(u, q), y ^ mymul(v, q)
	b,a, x,y, u,v = a,r, u,v, m,n
	gcd = b
	return gcd, x, y

	def inv(x, p):
	r, a, b = egcd(x, p)
	if r != 1:
	return None
	else:
	return a

	def flip(n, w):
	res = 0
	for _ in range(w):
	res = res << 1 \| (n & 1)
	n = n >> 1
	if n != 0:
	print w
	assert n == 0
	return res

	def identify_raw(f):
	# Populate the cache a bit
	cache = {}
	def get(data):
	cache[data] = f(data)
	get('')
	for p in [0, 1, 4, 32]:
	for c in ['\x00', '\x01', '\x02', '\x40', '\x80']:
	get(c + p * '\x00')

	# Get the number of bits of the polynomial
	# This is a bit dodgy especially for really bad
	# polynomials. We hope that our guess is good enough
	w = max(v.bit_length() for v in cache.values())
	upper = 1 << w

	# Create the dictionary of possible candidates
	candidates = {}
	def make(c, refin, refout):
	candidates[c, refin, refout] = crc._make_crc('', c, w, 0, refin, refout, 0, 0)
	make(( cache['\x00'] ^ cache['\x01']) \| upper, False, False)
	make(( cache['\x00'] ^ cache['\x80']) \| upper, True, False)
	make(flip(cache['\x00'] ^ cache['\x01'], w) \| upper, False, True)
	make(flip(cache['\x00'] ^ cache['\x80'], w) \| upper, True, True)

	# Try everything in the cache
	for k, my_f in list(candidates.items()):
	bad = False
	for s in cache.keys():
	ref = '\x00' * len(s)
	if my_f(s) ^ my_f(ref) != cache[s] ^ cache[ref]:
	del candidates[k]
	break

	# We assert that we have a solution
	# If we get multiple solutions, we assert that they are equivalent
	candidates = candidates.keys()
	if len(candidates) == 1:
	ambigious = False
	else:
	assert len(candidates) == 2
	assert candidates[0][0] == candidates[1][0]
	assert flip(candidates[0][0], w+1) == candidates[0][0]
	assert candidates[0][1] == (not candidates[1][1])
	assert candidates[0][2] == (not candidates[1][2])
	ambigious = True

	poly, refin, refout = candidates[0]

	# Now identify the xor values
	# Start by factoring out values of x^1 + 1 from the polynomial
	# Each factor doubles the number of solutions
	count = 0
	remainder = poly
	while True:
	d, m = mydivmod(remainder, 0b11)
	if m == 0:
	count += 1
	remainder = d
	else:
	break

	# Get the values for f('') and f('\x00') represented
	# as polynomials
	if refout:
	val0 = flip(cache[''], w)
	val8 = flip(cache['\x00'], w)
	else:
	val0 = cache['']
	val8 = cache['\x00']

	# Do some fancy math to find all the solutions
	results = []
	diff = mymod(val0 ^ val8, remainder)
	theinv = inv(mymod(2**8 + 1, remainder), remainder)
	factor = mymul(diff, theinv)
	for x in range(1 << count):
	x = mymul(x, remainder) ^ factor
	x = mymod(x, poly)
	y = int(x ^ val0)
	if refout:
	results.append((poly, w, int(x), refin, refout, flip(y, w)))
	else:
	results.append((poly, w, int(x), refin, refout, y))

	# If we previously found out that the polynomial
	# was ambigious, then double the number of solutions
	# while correcting the reflection suitably
	if ambigious:
	for poly, w, init, refin, refout, xorout in list(results):
	results.append((poly, w, flip(init, w), not refin, not refout, xorout))

	return results

	def identify(f):
	# If we get more than a one result, we know that they are
	# equivalent, yet we would like to just return a single one.
	# We score them and return the best result.
	def score((poly, w, init, refin, refout, xorout)):
	table = {
	0: 3,
	(1 << w) - 1: 1
	}

	return sum(table.get(v, 0) for v in [init, xorout]), refin, refout, -init
	return max(identify_raw(f), key = score)

	# Test that we correctly identify all functions from the pwntools table
	from pwnlib.util import crc
	for k, v in crc.all_crcs.items():
	poly, w, init, refin, refout, xorout = identify(getattr(crc, k))
	assert v['poly'] == (poly ^ (1 << (poly.bit_length() - 1)))
	assert v['width'] == w
	assert v['refin'] == refin
	assert v['refout'] == refout
	assert v['init'] == init
	assert v['xorout'] == xorout

	# Test that we correctly identify randomly generated CRC functions.
	for _ in range(1000):
	w = random.randint(2, 64)
	refin = random.choice([True, False])
	refout = random.choice([True, False])
	poly = random.randint(1, (1 << (w-1))-1) \| (1 << w)
	init = random.randint(0, (1 << (w-1))-1)
	xorout = random.randint(0, (1 << (w-1))-1)

	# When refout == True and the polynomial is even,
	# then it introduces a lot of inherent ambiguity,
	# as a smaller polynomial could have been used.
	# I am not sure how to handle that (if it is even possible),
	# so for now we just avoid it. Not that no existing CRC-sums
	# has even polynomials (as it would be stupid to do so).
	if refout:
	poly \|= 1

	# Generate the functions
	f = crc._make_crc('', poly, w, init, refin, refout, xorout, 0)

	# Assert that we correctly identified it
	assert (poly, w, init, refin, refout, xorout) in identify_raw(f)