Last active
October 2, 2023 18:55
-
-
Save cmrfrd/21bad19b3ead7f21ca89d158615468cc to your computer and use it in GitHub Desktop.
Gaussian Jordan Elim for finding bithash collision
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
np.random.seed(256) | |
DTYPE=np.int64 | |
P = 101 | |
def is_singular(matrix: np.ndarray) -> bool: | |
return np.isclose(np.linalg.det(matrix), 0) | |
def add(x, y, m): return (x + y) % m | |
def sub(x, y, m): return (x - y) % m | |
def mul(x, y, m): return (x * y) % m | |
def inv(x, m): | |
g, a, _ = egcd(x, m) | |
if g != 1: | |
if m == 2: return 0 | |
raise Exception(f"no inverse {x} mod {m}") | |
else: | |
return a % m | |
def egcd(a, b): | |
if a == 0: | |
return (b, 0, 1) | |
else: | |
g, y, x = egcd(b % a, a) | |
return (g, x - (b // a).astype(DTYPE) * y, y) | |
def gje_solution(A, b, m): | |
""" | |
Do gaussian-jordan elimination over mod field m and return some solution to the unknowns. | |
Algorithm based on: https://www.math.purdue.edu/~shao92/documents/Algorithm%20REF.pdf | |
""" | |
assert isinstance(m, int), "m should be an integer." | |
assert len(A.shape) == 2, "A should be 2-dimensional." | |
assert len(b.shape) == 1, "b should be 1-dimensional." | |
assert A.shape[0] == b.shape[0], "Number of equations in A != length of b." | |
assert A.dtype == b.dtype, "A and b should have the same dtype." | |
Ab = np.hstack((A, b.reshape(-1, 1))) | |
# to upper triangular | |
for i in range(len(b)): | |
# row swap | |
max_row = (np.argmax(np.abs(Ab[i:, i])) + i) | |
Ab[[i, max_row]] = Ab[[max_row, i]] | |
# row norm | |
Ab[i] = mul(Ab[i], inv(Ab[i, i], m), m) | |
# subtract multiple of current row from other rows | |
for j in range(i + 1, len(b)): | |
Ab[j] = sub(Ab[j], mul(Ab[j, i], Ab[i], m), m) | |
# to reduced row echelon form | |
for i in range(len(b) - 1, -1, -1): | |
for j in range(i - 1, -1, -1): | |
Ab[j] = sub(Ab[j], mul(Ab[j, i], Ab[i], m), m) | |
# check if no solution | |
all_zero = np.sum(Ab[:, :-1], axis=1) == 0 | |
non_zero_result = Ab[:, -1] != 0 | |
if np.any(all_zero & non_zero_result): | |
raise Exception("no solution") | |
# get single solution for (a, b, c, d) | |
num_equations, num_vars_plus_one = Ab.shape | |
num_vars = num_vars_plus_one - 1 | |
x = np.zeros(num_vars, dtype=Ab.dtype) | |
for i in range(min(num_vars, num_equations)): | |
if np.all(Ab[i, :-1] == 0): # free variable | |
x[i] = 0 | |
else: # pivot | |
x[i] = Ab[i, -1] | |
return Ab, x | |
# Example: 1 | |
A = np.array([[-1, 2, 6, 7], | |
[3, -6, 0, -3], | |
[1, 0, 6, -1]]) | |
b = np.array([15, -9, 5]) | |
_, solution = gje_solution(A, b, P) | |
print("Solution1:\n", solution) | |
result = np.sum(mul(A, solution, P), axis=1) % P | |
assert np.all(result == b % P), "Solution is incorrect." | |
# Example: 2 | |
A = np.array([[2, 1, 0, 2], | |
[0, 1, 0, 3], | |
[-1, 2, 0, 4], | |
[0, 1, 7, 2]]) | |
b = np.array([4, 3, 3, 2]) | |
_, solution = gje_solution(A, b, P) | |
print("Solution2:\n", solution) | |
result = np.sum(mul(A, solution, P), axis=1) % P | |
assert np.all(result == b % P), "Solution is incorrect." | |
# Example: 3 | |
num_hashes = 100 | |
hash_size = 32 | |
A = np.random.randint(0, 2, size=(hash_size, num_hashes)) | |
b = np.random.randint(0, 2, size=(hash_size,)) | |
_, solution = gje_solution(A, b, 2) | |
print("Solution3:\n", solution) | |
hash_subset = A[:, solution.astype(bool)] | |
result = np.bitwise_xor.reduce(hash_subset, axis=1) | |
assert np.allclose(result, b), "Solution is incorrect." |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment