maqp/real_keys.py

## real_keys.py
import math
import time

from cryptography.hazmat.primitives.asymmetric import rsa

def main():

    key_sizes_to_test                   = [512, 640, 768, 896]
    number_of_keys_per_size_to_generate = 10000

    previous_smallest_distance_between_two_x = None
    previous_key_size = None

    for key_size in key_sizes_to_test:

        list_of_x_values = []

        start_time = time.monotonic()

        for i in range(number_of_keys_per_size_to_generate):
            private_key    = rsa.generate_private_key(public_exponent=65537, key_size=key_size)
            prime_factor_1 = private_key.private_numbers().p
            prime_factor_2 = private_key.private_numbers().q
            semiprime      = private_key.private_numbers().public_numbers.n

            side_C = (prime_factor_1 + prime_factor_2) / 2
            side_B = math.sqrt(semiprime)
            x      = side_C - side_B

            # We collect all values of x for later analysis.
            list_of_x_values.append(x)

        duration = time.monotonic() - start_time

        # We get the smallest and greatest values for X.
        smallest_x = f"{min(list_of_x_values):.0f}"
        greatest_x = f"{max(list_of_x_values):.0f}"

        smallest_distance_between_x = None
        for i, first_value in enumerate(list_of_x_values):
            for j, second_value in enumerate(list_of_x_values):

                if i == j:    # Ignore comparison of values with same identity. This doesn't prevent distance between
                    continue  # two compared x numbers from being 0, it just prevents comparisons against each value of
                              # X against themselves (which is always zero).

                distance_between_values = abs(first_value-second_value)

                if smallest_distance_between_x is None:
                    smallest_distance_between_x = distance_between_values
                else:
                    smallest_distance_between_x = min(distance_between_values, smallest_distance_between_x)

        smallest_distance_between_x_str = f"{smallest_distance_between_x:.0f}"

        # Whitespace for aligning the decimal points of each value.
        length_of_longest_value = len(max(smallest_x, greatest_x, smallest_distance_between_x_str, key=len))
        whitespace1 = ' ' * (length_of_longest_value - len(smallest_x))
        whitespace2 = ' ' * (length_of_longest_value - len(greatest_x))
        whitespace3 = ' ' * (length_of_longest_value - len(smallest_distance_between_x_str))

        print(f"\nGenerated {number_of_keys_per_size_to_generate} {key_size}-bit keys in {duration:.1f} seconds.\n"
              f"The smallest x was                             {whitespace1} {smallest_x}\n"
              f"The greatest x was                             {whitespace2} {greatest_x}\n"
              f"The smallest distance between two x values was {whitespace3} {smallest_distance_between_x_str}")

        if previous_smallest_distance_between_two_x is not None and previous_key_size is not None:
            problem_size = smallest_distance_between_x / previous_smallest_distance_between_two_x
            print(f"\nIncreasing key size linearly from {previous_key_size} to {key_size} made the smallest distance between x:s in the sample {problem_size:.0f} times larger.")

        previous_smallest_distance_between_two_x = smallest_distance_between_x
        previous_key_size = key_size


if __name__ == '__main__':
    main()

    # Output when I ran the program:

    # Generated 10000 512-bit keys in 62.5 seconds.
    # The smallest x was                                      12019896571057247261053876450711896266866078393495290408054292480000
    # The greatest x was                              1009223972550147275717101238160743268768383705107733611013898955977893347328
    # The smallest distance between two x values was           1113672342193250620561601408476119330051987562811226746577265623040
    #
    # Generated 10000 640-bit keys in 71.0 seconds.
    # The smallest x was                                       21986876100048913151778421649542160516144701231635518536667851486981475794322035048448
    # The greatest x was                              18675834825171408407759973623876278536724568807543318811379579247292871367999119300836187963392
    # The smallest distance between two x values was             834029113031968889456023867956337401692058697869430504912429717413540816780605259776
    #
    # Increasing key size linearly from 512 to 640 made the smallest distance between x:s in the sample 748899906582436736 times larger.
    #
    # Generated 10000 768-bit keys in 101.7 seconds.
    # The smallest x was                                      1596443682508973441432548160541972187872163423236923252373060004830134954773635695097917138724648663384064
    # The greatest x was                              350565251279761667930362658206232530455526033878367674381360158638699293357613475168646552181512859306400061325312
    # The smallest distance between two x values was           379155538639685551066124516066385428441241214506476589429534483079952532505213417538949562210592334282752
    #
    # Increasing key size linearly from 640 to 768 made the smallest distance between x:s in the sample 454607078716150579200 times larger.
    #
    # Generated 10000 896-bit keys in 142.2 seconds.
    # The smallest x was                                      53464918858628610837571819224003485199149048817386891440515370270439865747191380750795481804744223769729748571876867811835904
    # The greatest x was                              6446610185364092605972281272641218049742659728012226654868106589696897120263776775183201080556852464575219028976564006853482026893312
    # The smallest distance between two x values was           2886309801231642076087903654506366522532028649934864581466469495773308879509324245297286093628874061087480770472538510721024
    #
    # Increasing key size linearly from 768 to 896 made the smallest distance between x:s in the sample 7612469045255131136 times larger.
    #
    # Process finished with exit code 0

    # So it's obvious X is most likely not going to be 0, or 1, or 2, or 100000000000000000000000000000000000000000000, about ever.


    # Grant claims that number of semiprimes where X is small grows when key size grows. This is technically true,
    # but meaningless in practice.

    # To prove this, let's simplify a bit and say any two integers p and q are valid factors of a product N (N=p*q).
    # Let's compare two decimal digit values.
    #
    # Both numbers can have values between 00, 01, 02, ..., 99 so that's 100 different values. In how many cases are the
    # distance of p and q 0 (and have thus very small X)? It turns out are also 100 cases of pairs (p, q) where
    # both numbers have the same exact value (00, 00), (01, 01) ... (99,99). All these values have distance |p-q| = 0.
    #
    # So the question for security is, if we randomly pick p and q, how likely is it we pick the same values leading to
    # 0? Calculating the probability of picking a pair with identical p and q requires us to know the set of all cases.
    # The list of all pairs go from (00,00) (00,01) ... (00,99), (01, 00), (01, 01), ... (99,99).
    # The total number of pairs is 100*100 = 10,000.
    #
    # So the probability of picking a pair with two identical values is 100 / 10,000 = 0.01 = 1%. One in 100 cases is
    # quite bad.
    #
    # Now we want to know is, if we increase key size, does this probability change, because if it does, it would help
    # if we would increase the size of the factors and thus the keysize.
    #
    # So let's increase the length of both factors by 1 decimal digit, i.e., let's compare three decimal digit values.
    #
    # Now the distance is 0 if all three digits in the pair are the same, (000,000) (001, 001), ..., (999, 999).
    # Now there is 1000 different value pairs that have |p-q| = 0! So like grant says, the number of factor pairs that
    # have very small X grows. Grant's claim is essentially, "because the number of pairs with small X grows, the
    # probability of X being small remains likely, and he claims the algorithm works in "massive number of cases".
    #
    # But wait, let's not give in yet. How many different cases are there in total? 1000*1000 = 1,000,000.
    # So a million cases. What is the probability of picking a pair with three same digits in this case?
    # 10,000 / 1,000,000 = 0.001 = 0.1%
    #
    # So 0.1 percent of prime pairs are bad. So ten times smaller portion of pairs were bad. That means it's ten times
    # less likely to pick a pair of bad factors.
    #
    # So while there are objectively 10 times more factors that would result in small X, the relative portion of those
    # pairs becomes 10 times smaller every time we add a digit. So when the size of factors grows linearly (i.e. when you
    # add one zero and so make a 10 a 100), the probability that you pick a bad factor becomes 10 times smaller.
    #
    # This allows us to determine the probability of picking two identical factors that result in small x, when given
    # the number of digits in the base10 factor:
    #
    # P(N) = 10^N / (10^N)^2
    #      = 1    / 10^N
    #
    # This shows that the probability of picking bad prime pair can only get more unlikely when key size increases.
    # This applies in all situations, including substrings.
    #
    # This also applies if the factors are prime numbers. The requirement of having a prime factors only means there
    # is not efficient way to figure out prime factors from semi prime. The requirement of having prime factors also
    # means that the space of possible factors is smaller, but not too small The size of gaps between prime factors
    # grows when number of digits in prime grows:
    # https://en.wikipedia.org/wiki/Prime_gap#/media/File:Wikipedia_primegaps.png
    # Since there are roughly n/ln(n) primes smaller than n[1], that would mean there are about 10^305 1024-bit primes.
    #
    #
    # But what if you get REAAAALLLY unlucky?
    #
    # Well, you'll be pleased to know that NIST requires that FIPS-compliant RSA implementations (read: every modern
    # crypto library) CHECK that the minimum value for p-q is at least 2^(N/2 - 100). So that would be 2^924 for
    # RSA-2048.
    #
    # [1] https://stackoverflow.com/a/16091676

    # So Grant is not TECHNICALLY lying when he states that space for X=0 is massive when generating random primes and
    # he's also not TECHNICALLY lying when he states X gets larger when key size grows. But he fails to mention
    # what we've shown here:
    #
    #   * Sampling showed RSA-512, RSA-680, RSA-768 and RSA-960, X is not even near 0.
    #   * Probability of picking prime factors that result in small X gets exponentially smaller when key size grows linearly
    #   * RSA implementation algorithms check that the distance between P and Q is large enough to make the attack impossible.
    #     This attack against RSA has been known for decades and it has been mitigated long since.
    #
    # So as always, Grant hasn't achieved shit. The reason he managed to factor semi-primes was he was actually factoring
    # numbers where the prime factors are actually close to one another. The reason they appear to novice eyes to be far
    # apart, is because we humans can't comprehend how long the excel file would have to be. To give you an idea
    # let's create an RSA-2048 key and look at the distance between its factors:

    # private_key = rsa.generate_private_key(public_exponent=65537, key_size=2048)
    # prime_factor_1 = private_key.private_numbers().p
    # prime_factor_2 = private_key.private_numbers().q
    #
    # distance = f"{abs(prime_factor_1 - prime_factor_2):.0f}"
    # print(distance)
    #
    # This output for me the following value
    # 158668351314163897941513021472819439535737161270384929160014826156809873701937156100996679638299361924158643541160
    # 363536051484219329528722492947568028265196689575979113059512991254454139565628801812999505564564300856542096983478
    # 60599736696003561965602317367013105909087707110614109439636710610778611723010048

    # That is the distance between p and q. If we imagine p=2, and the distance is the next prime, we see there are
    # about n/ln(n) = 10^304 primes between them, so the Excel would need to be at least 10^304 lines long. There are
    # only 10^80 atoms on the observable universe so it's not possible to create excel sheet from all the matter that
    # exists.
	import math
	import time

	from cryptography.hazmat.primitives.asymmetric import rsa

	def main():

	key_sizes_to_test = [512, 640, 768, 896]
	number_of_keys_per_size_to_generate = 10000

	previous_smallest_distance_between_two_x = None
	previous_key_size = None

	for key_size in key_sizes_to_test:

	list_of_x_values = []

	start_time = time.monotonic()

	for i in range(number_of_keys_per_size_to_generate):
	private_key = rsa.generate_private_key(public_exponent=65537, key_size=key_size)
	prime_factor_1 = private_key.private_numbers().p
	prime_factor_2 = private_key.private_numbers().q
	semiprime = private_key.private_numbers().public_numbers.n

	side_C = (prime_factor_1 + prime_factor_2) / 2
	side_B = math.sqrt(semiprime)
	x = side_C - side_B

	# We collect all values of x for later analysis.
	list_of_x_values.append(x)

	duration = time.monotonic() - start_time

	# We get the smallest and greatest values for X.
	smallest_x = f"{min(list_of_x_values):.0f}"
	greatest_x = f"{max(list_of_x_values):.0f}"

	smallest_distance_between_x = None
	for i, first_value in enumerate(list_of_x_values):
	for j, second_value in enumerate(list_of_x_values):

	if i == j: # Ignore comparison of values with same identity. This doesn't prevent distance between
	continue # two compared x numbers from being 0, it just prevents comparisons against each value of
	# X against themselves (which is always zero).

	distance_between_values = abs(first_value-second_value)

	if smallest_distance_between_x is None:
	smallest_distance_between_x = distance_between_values
	else:
	smallest_distance_between_x = min(distance_between_values, smallest_distance_between_x)

	smallest_distance_between_x_str = f"{smallest_distance_between_x:.0f}"

	# Whitespace for aligning the decimal points of each value.
	length_of_longest_value = len(max(smallest_x, greatest_x, smallest_distance_between_x_str, key=len))
	whitespace1 = ' ' * (length_of_longest_value - len(smallest_x))
	whitespace2 = ' ' * (length_of_longest_value - len(greatest_x))
	whitespace3 = ' ' * (length_of_longest_value - len(smallest_distance_between_x_str))

	print(f"\nGenerated {number_of_keys_per_size_to_generate} {key_size}-bit keys in {duration:.1f} seconds.\n"
	f"The smallest x was {whitespace1} {smallest_x}\n"
	f"The greatest x was {whitespace2} {greatest_x}\n"
	f"The smallest distance between two x values was {whitespace3} {smallest_distance_between_x_str}")

	if previous_smallest_distance_between_two_x is not None and previous_key_size is not None:
	problem_size = smallest_distance_between_x / previous_smallest_distance_between_two_x
	print(f"\nIncreasing key size linearly from {previous_key_size} to {key_size} made the smallest distance between x:s in the sample {problem_size:.0f} times larger.")

	previous_smallest_distance_between_two_x = smallest_distance_between_x
	previous_key_size = key_size


	if __name__ == '__main__':
	main()

	# Output when I ran the program:

	# Generated 10000 512-bit keys in 62.5 seconds.
	# The smallest x was 12019896571057247261053876450711896266866078393495290408054292480000
	# The greatest x was 1009223972550147275717101238160743268768383705107733611013898955977893347328
	# The smallest distance between two x values was 1113672342193250620561601408476119330051987562811226746577265623040
	#
	# Generated 10000 640-bit keys in 71.0 seconds.
	# The smallest x was 21986876100048913151778421649542160516144701231635518536667851486981475794322035048448
	# The greatest x was 18675834825171408407759973623876278536724568807543318811379579247292871367999119300836187963392
	# The smallest distance between two x values was 834029113031968889456023867956337401692058697869430504912429717413540816780605259776
	#
	# Increasing key size linearly from 512 to 640 made the smallest distance between x:s in the sample 748899906582436736 times larger.
	#
	# Generated 10000 768-bit keys in 101.7 seconds.
	# The smallest x was 1596443682508973441432548160541972187872163423236923252373060004830134954773635695097917138724648663384064
	# The greatest x was 350565251279761667930362658206232530455526033878367674381360158638699293357613475168646552181512859306400061325312
	# The smallest distance between two x values was 379155538639685551066124516066385428441241214506476589429534483079952532505213417538949562210592334282752
	#
	# Increasing key size linearly from 640 to 768 made the smallest distance between x:s in the sample 454607078716150579200 times larger.
	#
	# Generated 10000 896-bit keys in 142.2 seconds.
	# The smallest x was 53464918858628610837571819224003485199149048817386891440515370270439865747191380750795481804744223769729748571876867811835904
	# The greatest x was 6446610185364092605972281272641218049742659728012226654868106589696897120263776775183201080556852464575219028976564006853482026893312
	# The smallest distance between two x values was 2886309801231642076087903654506366522532028649934864581466469495773308879509324245297286093628874061087480770472538510721024
	#
	# Increasing key size linearly from 768 to 896 made the smallest distance between x:s in the sample 7612469045255131136 times larger.
	#
	# Process finished with exit code 0

	# So it's obvious X is most likely not going to be 0, or 1, or 2, or 100000000000000000000000000000000000000000000, about ever.


	# Grant claims that number of semiprimes where X is small grows when key size grows. This is technically true,
	# but meaningless in practice.

	# To prove this, let's simplify a bit and say any two integers p and q are valid factors of a product N (N=p*q).
	# Let's compare two decimal digit values.
	#
	# Both numbers can have values between 00, 01, 02, ..., 99 so that's 100 different values. In how many cases are the
	# distance of p and q 0 (and have thus very small X)? It turns out are also 100 cases of pairs (p, q) where
	# both numbers have the same exact value (00, 00), (01, 01) ... (99,99). All these values have distance \|p-q\| = 0.
	#
	# So the question for security is, if we randomly pick p and q, how likely is it we pick the same values leading to
	# 0? Calculating the probability of picking a pair with identical p and q requires us to know the set of all cases.
	# The list of all pairs go from (00,00) (00,01) ... (00,99), (01, 00), (01, 01), ... (99,99).
	# The total number of pairs is 100*100 = 10,000.
	#
	# So the probability of picking a pair with two identical values is 100 / 10,000 = 0.01 = 1%. One in 100 cases is
	# quite bad.
	#
	# Now we want to know is, if we increase key size, does this probability change, because if it does, it would help
	# if we would increase the size of the factors and thus the keysize.
	#
	# So let's increase the length of both factors by 1 decimal digit, i.e., let's compare three decimal digit values.
	#
	# Now the distance is 0 if all three digits in the pair are the same, (000,000) (001, 001), ..., (999, 999).
	# Now there is 1000 different value pairs that have \|p-q\| = 0! So like grant says, the number of factor pairs that
	# have very small X grows. Grant's claim is essentially, "because the number of pairs with small X grows, the
	# probability of X being small remains likely, and he claims the algorithm works in "massive number of cases".
	#
	# But wait, let's not give in yet. How many different cases are there in total? 1000*1000 = 1,000,000.
	# So a million cases. What is the probability of picking a pair with three same digits in this case?
	# 10,000 / 1,000,000 = 0.001 = 0.1%
	#
	# So 0.1 percent of prime pairs are bad. So ten times smaller portion of pairs were bad. That means it's ten times
	# less likely to pick a pair of bad factors.
	#
	# So while there are objectively 10 times more factors that would result in small X, the relative portion of those
	# pairs becomes 10 times smaller every time we add a digit. So when the size of factors grows linearly (i.e. when you
	# add one zero and so make a 10 a 100), the probability that you pick a bad factor becomes 10 times smaller.
	#
	# This allows us to determine the probability of picking two identical factors that result in small x, when given
	# the number of digits in the base10 factor:
	#
	# P(N) = 10^N / (10^N)^2
	# = 1 / 10^N
	#
	# This shows that the probability of picking bad prime pair can only get more unlikely when key size increases.
	# This applies in all situations, including substrings.
	#
	# This also applies if the factors are prime numbers. The requirement of having a prime factors only means there
	# is not efficient way to figure out prime factors from semi prime. The requirement of having prime factors also
	# means that the space of possible factors is smaller, but not too small The size of gaps between prime factors
	# grows when number of digits in prime grows:
	# https://en.wikipedia.org/wiki/Prime_gap#/media/File:Wikipedia_primegaps.png
	# Since there are roughly n/ln(n) primes smaller than n[1], that would mean there are about 10^305 1024-bit primes.
	#
	#
	# But what if you get REAAAALLLY unlucky?
	#
	# Well, you'll be pleased to know that NIST requires that FIPS-compliant RSA implementations (read: every modern
	# crypto library) CHECK that the minimum value for p-q is at least 2^(N/2 - 100). So that would be 2^924 for
	# RSA-2048.
	#
	# [1] https://stackoverflow.com/a/16091676

	# So Grant is not TECHNICALLY lying when he states that space for X=0 is massive when generating random primes and
	# he's also not TECHNICALLY lying when he states X gets larger when key size grows. But he fails to mention
	# what we've shown here:
	#
	# * Sampling showed RSA-512, RSA-680, RSA-768 and RSA-960, X is not even near 0.
	# * Probability of picking prime factors that result in small X gets exponentially smaller when key size grows linearly
	# * RSA implementation algorithms check that the distance between P and Q is large enough to make the attack impossible.
	# This attack against RSA has been known for decades and it has been mitigated long since.
	#
	# So as always, Grant hasn't achieved shit. The reason he managed to factor semi-primes was he was actually factoring
	# numbers where the prime factors are actually close to one another. The reason they appear to novice eyes to be far
	# apart, is because we humans can't comprehend how long the excel file would have to be. To give you an idea
	# let's create an RSA-2048 key and look at the distance between its factors:

	# private_key = rsa.generate_private_key(public_exponent=65537, key_size=2048)
	# prime_factor_1 = private_key.private_numbers().p
	# prime_factor_2 = private_key.private_numbers().q
	#
	# distance = f"{abs(prime_factor_1 - prime_factor_2):.0f}"
	# print(distance)
	#
	# This output for me the following value
	# 158668351314163897941513021472819439535737161270384929160014826156809873701937156100996679638299361924158643541160
	# 363536051484219329528722492947568028265196689575979113059512991254454139565628801812999505564564300856542096983478
	# 60599736696003561965602317367013105909087707110614109439636710610778611723010048

	# That is the distance between p and q. If we imagine p=2, and the distance is the next prime, we see there are
	# about n/ln(n) = 10^304 primes between them, so the Excel would need to be at least 10^304 lines long. There are
	# only 10^80 atoms on the observable universe so it's not possible to create excel sheet from all the matter that
	# exists.