Skip to content

Instantly share code, notes, and snippets.

@johnhw
Last active January 6, 2024 16:09
Show Gist options
  • Save johnhw/dfc7b8b8519aac530ac97da226c17bd5 to your computer and use it in GitHub Desktop.
Save johnhw/dfc7b8b8519aac530ac97da226c17bd5 to your computer and use it in GitHub Desktop.
1 million prime UMAP layout
### JHW 2018
import numpy as np
import umap
# This code from the excellent module at:
# https://stackoverflow.com/questions/4643647/fast-prime-factorization-module
import random
_known_factors = {}
totients = {}
def primesbelow(N):
# http://stackoverflow.com/questions/2068372/fastest-way-to-list-all-primes-below-n-in-python/3035188#3035188
#""" Input N>=6, Returns a list of primes, 2 <= p < N """
correction = N % 6 > 1
N = {0:N, 1:N-1, 2:N+4, 3:N+3, 4:N+2, 5:N+1}[N%6]
sieve = [True] * (N // 3)
sieve[0] = False
for i in range(int(N ** .5) // 3 + 1):
if sieve[i]:
k = (3 * i + 1) | 1
sieve[k*k // 3::2*k] = [False] * ((N//6 - (k*k)//6 - 1)//k + 1)
sieve[(k*k + 4*k - 2*k*(i%2)) // 3::2*k] = [False] * ((N // 6 - (k*k + 4*k - 2*k*(i%2))//6 - 1) // k + 1)
return [2, 3] + [(3 * i + 1) | 1 for i in range(1, N//3 - correction) if sieve[i]]
smallprimeset = set(primesbelow(1000000))
_smallprimeset = 1000000
smallprimes = primesbelow(10000000)
prime_ix = {p:i for i,p in enumerate(smallprimes)}
def isprime(n, precision=7):
# http://en.wikipedia.org/wiki/Miller-Rabin_primality_test#Algorithm_and_running_time
if n < 1:
raise ValueError("Out of bounds, first argument must be > 0")
elif n <= 3:
return n >= 2
elif n % 2 == 0:
return False
elif n < _smallprimeset:
return n in smallprimeset
d = n - 1
s = 0
while d % 2 == 0:
d //= 2
s += 1
for repeat in range(precision):
a = random.randrange(2, n - 2)
x = pow(a, d, n)
if x == 1 or x == n - 1: continue
for r in range(s - 1):
x = pow(x, 2, n)
if x == 1: return False
if x == n - 1: break
else: return False
return True
# https://comeoncodeon.wordpress.com/2010/09/18/pollard-rho-brent-integer-factorization/
def pollard_brent(n):
if n % 2 == 0: return 2
if n % 3 == 0: return 3
y, c, m = random.randint(1, n-1), random.randint(1, n-1), random.randint(1, n-1)
g, r, q = 1, 1, 1
while g == 1:
x = y
for i in range(r):
y = (pow(y, 2, n) + c) % n
k = 0
while k < r and g==1:
ys = y
for i in range(min(m, r-k)):
y = (pow(y, 2, n) + c) % n
q = q * abs(x-y) % n
g = gcd(q, n)
k += m
r *= 2
if g == n:
while True:
ys = (pow(ys, 2, n) + c) % n
g = gcd(abs(x - ys), n)
if g > 1:
break
return g
def _primefactors(n, sort=False):
factors = []
for checker in smallprimes:
while n % checker == 0:
factors.append(checker)
n //= checker
# early exit memoization
if n in _known_factors:
return factors + _known_factors[n]
if checker > n: break
if n < 2: return factors
while n > 1:
if isprime(n):
factors.append(n)
break
factor = pollard_brent(n) # trial division did not fully factor, switch to pollard-brent
factors.extend(primefactors(factor)) # recurse to factor the not necessarily prime factor returned by pollard-brent
n //= factor
if sort: factors.sort()
return factors
def primefactors(n, sort=False):
if n in _known_factors:
return _known_factors[n]
result = _primefactors(n)
_known_factors[n] = result
return result
from collections import defaultdict
def factorization(n):
factors = defaultdict(int)
for p1 in primefactors(n):
factors[p1] += 1
return factors
def unique_factorise(n):
return set(primefactors(n))
def totient(n):
if n == 0: return 1
try: return totients[n]
except KeyError: pass
tot = 1
for p, exp in factorization(n).items():
tot *= (p - 1) * p ** (exp - 1)
totients[n] = tot
return tot
def gcd(a, b):
if a == b: return a
while b > 0: a, b = b, a % b
return a
def lcm(a, b):
return abs((a // gcd(a, b)) * b)
### end
## Create sparse binary factor vectors for any number, and assemble into a matrix
## One column for each unique prime factor
## One row for each number, 0=does not have this factor, 1=does have this factor (might be repeated)
from scipy.special import expi
import scipy.sparse
def factor_vector_lil(n):
## approximate prime counting function (upper bound for the values we are interested in)
## gives us the number of rows (dimension of our space)
d = int(np.ceil(expi(np.log(n))))
x = scipy.sparse.lil_matrix((n,d))
for i in range(2,n):
for k,v in factorization(i).items():
x[i,prime_ix[k]] = 1
if i%100000==0: # just check it is still alive...
print(i)
return x
### Generate the matrix for 1 million integers
n = 1_000
X = factor_vector_lil(n)
# embed with UMAP
embedding = umap.UMAP(metric='cosine', n_epochs=500).fit_transform(X)
# save for later
np.savez('1e6_pts.npz', embedding=embedding)
# and save the image
from matplotlib import pyplot as plt
fig = plt.figure(figsize=(8,8))
fig.patch.set_facecolor('black')
plt.scatter(embedding[:,0], embedding[:,1], marker='o', s=0.005, edgecolor='',
c=np.arange(n), cmap="magma")
plt.axis("off")
plt.savefig("primes_umap_1e6_16k_smaller_pts.png", dpi=2000, facecolor='black')
@mcnemesis
Copy link

Hi. Been trying to reproduce this via ubuntu (in windows 10), but heck, am stuck getting past this:

image

my virtualenv looks like this:

blessings==1.7
bpython==0.17.1
curtsies==0.3.0
Django==1.10.5
django-appconf==1.0.2
django-compressor==2.1.1
django-leaflet-storage==0.8.2
greenlet==0.4.14
numpy==1.15.1
oauthlib==2.1.0
olefile==0.45.1
Pillow==4.0.0
pkg-resources==0.0.0
psycopg2==2.6.2
Pygments==2.2.0
PyJWT==1.6.4
python-openid==2.2.5
rcssmin==1.0.6
requests==2.13.0
requests-oauthlib==1.0.0
rjsmin==1.0.12
scipy==1.1.0
six==1.11.0
social-auth-app-django==1.1.0
social-auth-core==1.7.0
typing==3.6.4
umap-project==0.8.3
wcwidth==0.1.7

(( note: there are some bpython deps in there ))

What's the right package/provider for umap.UMAP ?
How do I resolve this?

@Miffyli
Copy link

Miffyli commented Aug 22, 2018

@mcnemesis

pip install umap-learn rather than pip install umap

@mcnemesis
Copy link

Finally! Here's one interesting cluster I've identified...

image

Not yet sure what this is, but am glad to have gotten to this - this exercise's helped to up my *nix environment on windows :-)

@dovinmu
Copy link

dovinmu commented Aug 24, 2018

This is so great! I wanted to animate it through the integers so I wrote this (rewrite the f-strings if you have Python < 3.6):

step = 500
frame_num = int(n/step)
fname_prefix = 'primes_umap_2k_smaller_pts_'

print("rendering", frame_num, "frames with", step, "integers per frame")
for frame_n in range(1, frame_num+1):
    _n = frame_n * step

    fig = plt.figure(figsize=(8,8))
    fig.patch.set_facecolor('black')
    plt.scatter(embedding[0:_n,0], embedding[0:_n,1], s=0.005, c=np.arange(_n), cmap='magma', marker='o')

    plt.axis("off")
    plt.savefig(f"frames/{fname_prefix}{frame_n}.png", dpi=250, facecolor='black')
    plt.close(fig)
    print(f"rendered frame {frame_n}/{frame_num}", end='\r')

And used ffmpeg to make it into a video:
ffmpeg -ss 1 -t 200 -i frames\primes_umap_2k_smaller_pts_%d.png -c:v libx264 -vf fps=25 -pix_fmt yuv420p out.mp4

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment