Skip to content

Instantly share code, notes, and snippets.

@cycyyy
Created November 28, 2021 04:51
Show Gist options
  • Save cycyyy/242ba0cd755e353ae27c7287a488126d to your computer and use it in GitHub Desktop.
Save cycyyy/242ba0cd755e353ae27c7287a488126d to your computer and use it in GitHub Desktop.
import numpy as np
from sklearn.neighbors import NearestNeighbors
from scipy import spatial
import math
from multiprocessing.pool import Pool
import multiprocessing
def main(X, Y, k, alpha):
nbrs = NearestNeighbors(n_neighbors=k, algorithm='kd_tree').fit(X)
tree = spatial.KDTree(Y)
distances, indices = nbrs.kneighbors(X)
N = X.shape[0]
M = Y.shape[0]
res = 0.0
b = B(k, alpha)
#def get_upsilon(x):
# return tree.query(x, k)[0][-1]
#pool = Pool(processes=40)
#upsilons = pool.map(get_upsilon, X)
#print(len(upsilons))
for i in range(0, N):
rho = distances[i][-1]
upsilon = tree.query(X[i], k)[0][-1]
if rho == 0 and upsilon == 0:
res += 1
continue
print(i)
res += first(M, N, rho, upsilon, alpha) * b
return res / N
def first(M, N, rho, upsilon, alpha):
return math.pow(((N - 1) * rho / (M * upsilon)), 1 / alpha)
import scipy.special as sc
def B(k, alpha):
return gammadiv(sc.gammaln(k), sc.gammaln(k -alpha + 1)) * gammadiv(sc.gammaln(k), sc.gammaln(k + alpha - 1))
def gammadiv(a, b):
return math.exp(a - b)
import gc
import sys
import os
def mutli(k, alpha, start_index, end_index, N):
try:
sys.setrecursionlimit(1000000)
X = np.load(str(start_index) + "x.npy")
Y = np.load("y.npy")
distances = np.load("dis.npy")
# nbrs = NearestNeighbors(n_neighbors=k, algorithm='kd_tree').fit(X)
tree = spatial.KDTree(Y)
# distances, indices = nbrs.kneighbors(X)
M = Y.shape[0]
# N = X.shape[0]
res = 0.0
b = B(k, alpha)
for i in range(0, end_index - start_index):
# if i % 10 == 0:
# print(i, end_index - start_index)
rho = distances[i][-1]
upsilon = tree.query(X[i], k)[0][-1]
if rho == 0 and upsilon == 0:
res += 1
continue
t = k
while upsilon == 0:
t *= 2
upsilon = tree.query(X[i], t)[0][-1]
res += first(M, N, rho, upsilon, alpha) * b
except:
print("waht?")
# print("finish")
return res
import faiss
def mutli_f(k, alpha, start_index, end_index, N):
try:
sys.setrecursionlimit(1000000)
X = np.load(str(start_index) + "x.npy").astype('float32')
Y = np.load("y.npy").astype('float32')
distances = np.load("dis.npy").astype('float32')
tree = faiss.IndexFlatL2(Y.shape[1])
tree.add(Y)
M = Y.shape[0]
res = 0.0
b = B(k, alpha)
for i in range(0, end_index - start_index):
# if i % 10 == 0:
# print(i, end_index - start_index)
rho = distances[i][-1]
upsilon = tree.search(X[i], k)[0][-1]
if rho == 0 and upsilon == 0:
res += 1
continue
t = k
while upsilon == 0:
t *= 2
upsilon = tree.query(X[i], t)[0][-1]
res += first(M, N, rho, upsilon, alpha) * b
except Exception as e:
print("waht?", e)
# print("finish")
return res
def main_mutli(X, Y, k, alpha):
core = 40
np.save("y.npy", Y)
nbrs = NearestNeighbors(n_neighbors=k, algorithm='kd_tree').fit(X)
distances, indices = nbrs.kneighbors(X)
np.save("dis.npy", distances)
result = []
pool = multiprocessing.Pool(processes=core)
N = X.shape[0]
start_index = 0
step = N // (core - 1)
for i in range(0, core):
end_index = start_index + step
if end_index > N:
end_index = N
np.save(str(start_index) + "x.npy", X[start_index:end_index])
result.append(pool.apply_async(mutli_f, (k, alpha, start_index, end_index, N)))
# pool.apply_async(mutli, (k, alpha, start_index, end_index, N))
start_index += step
s = 0.0
pool.close()
for r in result:
s += r.get()
return s / N
def main_f(X, Y, k, alphas):
nbrs = NearestNeighbors(n_neighbors=k, algorithm='kd_tree').fit(X)
tree = faiss.IndexFlatL2(Y.shape[1])
tree.add(Y)
distances, indices = nbrs.kneighbors(X)
N = X.shape[0]
M = Y.shape[0]
res = 0.0
#def get_upsilon(x):
# return tree.query(x, k)[0][-1]
#pool = Pool(processes=40)
#upsilons = pool.map(get_upsilon, X)
#print(len(upsilons))
gpu = faiss.StandardGpuResources()
tree = faiss.index_cpu_to_gpu(gpu, 0, tree)
tmp_query = np.random.rand(1, Y.shape[1]).astype("float32")
upsilons = tree.search(X, k)[0]
reses = [0] * len(alphas)
bs = []
for alpha in alphas:
bs.append(B(k, alpha))
for i in range(0, N):
rho = distances[i][-1]
upsilon = upsilons[i][-1]
if rho == 0 and upsilon == 0:
res += 1
continue
t = k
tmp_query[0] = X[i]
while upsilon < 0.0001 and t <= 900:
t += 100
tmp_res = tree.search(tmp_query, t)[0][0]
# print(tmp_res, t)
'''
for p in tmp_res:
upsilon = np.linalg.norm(X[i] - Y[p])
if upsilon != 0:
break
if t > 400:
print(X[i])
print(Y[tree.search(tmp_query, t)[1][0][-1]])
print(np.sum(X[i] - Y[tree.search(tmp_query, t)[1][0][-1]]))
'''
for y in tmp_res:
if y > 0.0001:
upsilon = y
break
if upsilon < 0.0001:
res += 1
continue
#upsilon = tree.search(tmp_query, t)[0][0][-1]
# print(upsilon, flush=True)
for l in range(0, len(reses)):
reses[l] += first(M, N, rho, upsilon, alphas[l]) * bs[l]
for l in range(0, len(reses)):
reses[l] /= N
return reses
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment