Skip to content

Instantly share code, notes, and snippets.

@jakelevi1996
Last active July 31, 2020 12:13
Show Gist options
  • Save jakelevi1996/326f260aeb9ec3d55c727215e35c7cf4 to your computer and use it in GitHub Desktop.
Save jakelevi1996/326f260aeb9ec3d55c727215e35c7cf4 to your computer and use it in GitHub Desktop.
Listing primes

Listing primes

An efficient way to calculate all primes less than a certain threshold is to use a prime number sieve. This method can also be used to calculate the nth prime, as shown below:

import numpy as np

def prime_number_sieve(n_elements=100):
    sieve = np.ones(n_elements, dtype=int)
    sieve[0] = sieve[1] = sieve[4::2] = 0
    for i in range(3, int(n_elements / 2) + 1, 2):
        if sieve[i]:
            sieve[2*i::i] = 0
    return sieve

def nth_prime(n):
    n_elements = 10
    while True:
        sieve = prime_number_sieve(n_elements)
        if sieve.sum() >= n:
            break
        else:
            n_elements *= 10

    return np.argwhere(sieve).ravel()[n]

print(nth_prime(10001))

Output:

104759

Prime factorisation

This prime number sieve can also be used to calculate prime factorisations, as shown below:

class PrimeFactorisor:
    def __init__(self, prime_sieve_len_init=10):
        self.prime_sieve_len = prime_sieve_len_init
        sieve = prime_number_sieve(self.prime_sieve_len)
        self.prime_list = np.argwhere(sieve).ravel()
    
    def expand_prime_list_len(self, max_prime):
        while self.prime_sieve_len <= max_prime:
            self.prime_sieve_len *= 10

        sieve = prime_number_sieve(self.prime_sieve_len)
        self.prime_list = np.argwhere(sieve).ravel()

    def factorise(self, x):
        if self.prime_sieve_len <= x:
            self.expand_prime_list_len(x)
        
        factor_dict = dict()
        for p in self.prime_list:
            if x <= 1:
                break

            while x % p == 0:
                if p in factor_dict:
                    factor_dict[p] += 1
                else:
                    factor_dict[p] = 1
                x //= p
        
        return factor_dict

pf = PrimeFactorisor()
prime_factorise = lambda x: pf.factorise(x)

for i in [12, 31, 99, 12345]:
    print(i, prime_factorise(i))

Output:

12 {2: 2, 3: 1}
31 {31: 1}
99 {3: 2, 11: 1}
12345 {3: 1, 5: 1, 823: 1}

Comparing the prime number sieve with trial division

Recently I had a coding challenge as part of a job interview, and a sub-problem of the coding challenge required creating a list of prime numbers. My approach at the time was to make the list of primes using trial division, however a more efficient approach (as shown below) is to use a prime number sieve:

import numpy as np
from time import perf_counter

def prime_sieve_lt(x):
    """ Return a list of primes less than x, using a prime number sieve """
    prime_bools = np.full(x, True, np.bool)
    prime_bools[[0, 1]] = prime_bools[4::2] = False
    for trial_prime in range(3, x, 2):
        if prime_bools[trial_prime]:
            prime_bools[2*trial_prime::trial_prime] = False
    
    return np.arange(x)[prime_bools]


def is_prime(x, prime_list):
    """
    Given a sufficiently large ordered list of prime numbers, return True if x
    is prime; otherwise return False. 
    """
    sqrtx = int(np.ceil(np.sqrt(x)))
    for prime in prime_list:
        if x % prime == 0: return False
        if prime >= sqrtx: return True

    return True

def prime_trial_lt(x):
    """ Return a list of primes less than x, using trial division """
    prime_list = [2]
    for trial_prime in range(3, x, 2):
        if is_prime(trial_prime, prime_list):
            prime_list.append(trial_prime)

    return prime_list


if __name__ == "__main__":
    # Print list of first 100 primes
    print(prime_sieve_lt(100), prime_trial_lt(100), sep="\n")
    # Compare time complexity for first 1 million primes
    x = 1000000
    t0 = perf_counter()
    p1 = prime_sieve_lt(x)
    t1 = perf_counter()
    p2 = prime_trial_lt(x)
    t2 = perf_counter()
    assert np.array_equal(p1, p2)
    print("Time (s)\nSieve: {:.4f}\nTrial: {:.4f}".format(t1 - t0, t2 - t1))

Output:

[ 2  3  5  7 11 13 17 19 23 29 31 37 41 43 47 53 59 61 67 71 73 79 83 89 
 97]
[2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 
71, 73, 79, 83, 89, 97]
Time (s)
Sieve: 0.0706
Trial: 1.9497
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment