cmrfrd/gje.py

## gje.py
import numpy as np
np.random.seed(256)

DTYPE=np.int64
P = 101

def is_singular(matrix: np.ndarray) -> bool:
    return np.isclose(np.linalg.det(matrix), 0)
def add(x, y, m): return (x + y) % m
def sub(x, y, m): return (x - y) % m
def mul(x, y, m): return (x * y) % m
def inv(x, m):
    g, a, _ = egcd(x, m)
    if g != 1:
        if m == 2: return 0
        raise Exception(f"no inverse {x} mod {m}")
    else:
        return a % m
def egcd(a, b):
    if a == 0:
        return (b, 0, 1)
    else:
        g, y, x = egcd(b % a, a)
        return (g, x - (b // a).astype(DTYPE) * y, y)

def gje_solution(A, b, m):
    """
    Do gaussian-jordan elimination over mod field m and return some solution to the unknowns.
    Algorithm based on: https://www.math.purdue.edu/~shao92/documents/Algorithm%20REF.pdf
    """
    assert isinstance(m, int), "m should be an integer."
    assert len(A.shape) == 2, "A should be 2-dimensional."
    assert len(b.shape) == 1, "b should be 1-dimensional."
    assert A.shape[0] == b.shape[0], "Number of equations in A != length of b."
    assert A.dtype == b.dtype, "A and b should have the same dtype."

    Ab = np.hstack((A, b.reshape(-1, 1)))

    # to upper triangular
    for i in range(len(b)):
        # row swap
        max_row = (np.argmax(np.abs(Ab[i:, i])) + i)
        Ab[[i, max_row]] = Ab[[max_row, i]]

        # row norm
        Ab[i] = mul(Ab[i], inv(Ab[i, i], m), m)

        # subtract multiple of current row from other rows
        for j in range(i + 1, len(b)):
            Ab[j] = sub(Ab[j], mul(Ab[j, i], Ab[i], m), m)

    # to reduced row echelon form
    for i in range(len(b) - 1, -1, -1):
        for j in range(i - 1, -1, -1):
            Ab[j] = sub(Ab[j], mul(Ab[j, i], Ab[i], m), m)

    # check if no solution
    all_zero = np.sum(Ab[:, :-1], axis=1) == 0
    non_zero_result = Ab[:, -1] != 0
    if np.any(all_zero & non_zero_result):
        raise Exception("no solution")

    # get single solution for (a, b, c, d)
    num_equations, num_vars_plus_one = Ab.shape
    num_vars = num_vars_plus_one - 1
    x = np.zeros(num_vars, dtype=Ab.dtype)
    for i in range(min(num_vars, num_equations)):
        if np.all(Ab[i, :-1] == 0): # free variable
            x[i] = 0
        else: # pivot
            x[i] = Ab[i, -1]
    return Ab, x

# Example: 1
A = np.array([[-1, 2, 6, 7],
              [3, -6, 0, -3],
              [1, 0, 6, -1]])
b = np.array([15, -9, 5])

_, solution = gje_solution(A, b, P)

print("Solution1:\n", solution)
result = np.sum(mul(A, solution, P), axis=1) % P
assert np.all(result == b % P), "Solution is incorrect."

# Example: 2
A = np.array([[2, 1, 0, 2],
              [0, 1, 0, 3],
              [-1, 2, 0, 4],
              [0, 1, 7, 2]])
b = np.array([4, 3, 3, 2])

_, solution = gje_solution(A, b, P)

print("Solution2:\n", solution)
result = np.sum(mul(A, solution, P), axis=1) % P
assert np.all(result == b % P), "Solution is incorrect."

# Example: 3
num_hashes = 100
hash_size = 32
A = np.random.randint(0, 2, size=(hash_size, num_hashes))
b = np.random.randint(0, 2, size=(hash_size,))
_, solution = gje_solution(A, b, 2)
print("Solution3:\n", solution)

hash_subset = A[:, solution.astype(bool)]
result = np.bitwise_xor.reduce(hash_subset, axis=1)
assert np.allclose(result, b), "Solution is incorrect."
	import numpy as np
	np.random.seed(256)

	DTYPE=np.int64
	P = 101

	def is_singular(matrix: np.ndarray) -> bool:
	return np.isclose(np.linalg.det(matrix), 0)
	def add(x, y, m): return (x + y) % m
	def sub(x, y, m): return (x - y) % m
	def mul(x, y, m): return (x * y) % m
	def inv(x, m):
	g, a, _ = egcd(x, m)
	if g != 1:
	if m == 2: return 0
	raise Exception(f"no inverse {x} mod {m}")
	else:
	return a % m
	def egcd(a, b):
	if a == 0:
	return (b, 0, 1)
	else:
	g, y, x = egcd(b % a, a)
	return (g, x - (b // a).astype(DTYPE) * y, y)

	def gje_solution(A, b, m):
	"""
	Do gaussian-jordan elimination over mod field m and return some solution to the unknowns.
	Algorithm based on: https://www.math.purdue.edu/~shao92/documents/Algorithm%20REF.pdf
	"""
	assert isinstance(m, int), "m should be an integer."
	assert len(A.shape) == 2, "A should be 2-dimensional."
	assert len(b.shape) == 1, "b should be 1-dimensional."
	assert A.shape[0] == b.shape[0], "Number of equations in A != length of b."
	assert A.dtype == b.dtype, "A and b should have the same dtype."

	Ab = np.hstack((A, b.reshape(-1, 1)))

	# to upper triangular
	for i in range(len(b)):
	# row swap
	max_row = (np.argmax(np.abs(Ab[i:, i])) + i)
	Ab[[i, max_row]] = Ab[[max_row, i]]

	# row norm
	Ab[i] = mul(Ab[i], inv(Ab[i, i], m), m)

	# subtract multiple of current row from other rows
	for j in range(i + 1, len(b)):
	Ab[j] = sub(Ab[j], mul(Ab[j, i], Ab[i], m), m)

	# to reduced row echelon form
	for i in range(len(b) - 1, -1, -1):
	for j in range(i - 1, -1, -1):
	Ab[j] = sub(Ab[j], mul(Ab[j, i], Ab[i], m), m)

	# check if no solution
	all_zero = np.sum(Ab[:, :-1], axis=1) == 0
	non_zero_result = Ab[:, -1] != 0
	if np.any(all_zero & non_zero_result):
	raise Exception("no solution")

	# get single solution for (a, b, c, d)
	num_equations, num_vars_plus_one = Ab.shape
	num_vars = num_vars_plus_one - 1
	x = np.zeros(num_vars, dtype=Ab.dtype)
	for i in range(min(num_vars, num_equations)):
	if np.all(Ab[i, :-1] == 0): # free variable
	x[i] = 0
	else: # pivot
	x[i] = Ab[i, -1]
	return Ab, x

	# Example: 1
	A = np.array([[-1, 2, 6, 7],
	[3, -6, 0, -3],
	[1, 0, 6, -1]])
	b = np.array([15, -9, 5])

	_, solution = gje_solution(A, b, P)

	print("Solution1:\n", solution)
	result = np.sum(mul(A, solution, P), axis=1) % P
	assert np.all(result == b % P), "Solution is incorrect."

	# Example: 2
	A = np.array([[2, 1, 0, 2],
	[0, 1, 0, 3],
	[-1, 2, 0, 4],
	[0, 1, 7, 2]])
	b = np.array([4, 3, 3, 2])

	_, solution = gje_solution(A, b, P)

	print("Solution2:\n", solution)
	result = np.sum(mul(A, solution, P), axis=1) % P
	assert np.all(result == b % P), "Solution is incorrect."

	# Example: 3
	num_hashes = 100
	hash_size = 32
	A = np.random.randint(0, 2, size=(hash_size, num_hashes))
	b = np.random.randint(0, 2, size=(hash_size,))
	_, solution = gje_solution(A, b, 2)
	print("Solution3:\n", solution)

	hash_subset = A[:, solution.astype(bool)]
	result = np.bitwise_xor.reduce(hash_subset, axis=1)
	assert np.allclose(result, b), "Solution is incorrect."