Skip to content

Instantly share code, notes, and snippets.

@jxcodetw
Created February 23, 2020 17:55
Show Gist options
  • Save jxcodetw/0484d1140f054fe61edf4ad74511a759 to your computer and use it in GitHub Desktop.
Save jxcodetw/0484d1140f054fe61edf4ad74511a759 to your computer and use it in GitHub Desktop.
import torch
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
from umap.umap_ import fuzzy_simplicial_set, find_ab_params
import numpy as np
import matplotlib.pyplot as plt
from sklearn.manifold import SpectralEmbedding
from scipy.sparse import save_npz, load_npz
import random
from functools import partial
MIN_DIST=0.1
SPREAD=1.0
EPS = 1e-12
N_EPOCHS = 50
NEG_RATE = 5
BATCH_SIZE = 4096 * NEG_RATE
D_GRAD_CLIP = 19006880743424
DATA_NPZ_PATH = 'mnist_70000.npz'
def get_activation(act):
if act == 'lrelu':
return nn.LeakyReLU(0.2, inplace=True)
elif act == 'relu':
return nn.ReLU(inplace=True)
raise Exception('unsupported activation function')
class FCEncoder(nn.Module):
def __init__(self, dim, num_layers=3, act='lrelu'):
super(FCEncoder, self).__init__()
self.dim = dim
self.num_layers = num_layers
self.act = partial(get_activation, act=act)
hidden_dim = 256
layers = [
(nn.Linear(dim, hidden_dim*2)),
self.act(),
(nn.Linear(hidden_dim*2, hidden_dim)),
self.act(),
]
layers += [
(nn.Linear(hidden_dim, hidden_dim)),
self.act(),
] * num_layers
layers += [
(nn.Linear(hidden_dim, 2)),
]
self.net = nn.Sequential(*layers)
def forward(self, X):
return self.net(X)
def make_graph(P, n_epochs=-1):
graph = P.tocoo()
graph.sum_duplicates()
n_vertices = graph.shape[1]
if n_epochs <= 0:
# For smaller datasets we can use more epochs
if graph.shape[0] <= 10000:
n_epochs = 500
else:
n_epochs = 200
graph.data[graph.data < (graph.data.max() / float(n_epochs))] = 0.0
graph.eliminate_zeros()
return graph
def make_epochs_per_sample(weights, n_epochs):
result = -1.0 * np.ones(weights.shape[0], dtype=np.float64)
n_samples = n_epochs * (weights / weights.max())
result[n_samples > 0] = float(n_epochs) / n_samples[n_samples > 0]
return result
def neg_squared_euc_dists(X):
sum_X = X.pow(2).sum(dim=1)
D = (-2 * X @ X.transpose(1, 0) + sum_X).transpose(1, 0) + sum_X
return -D
def w_tsne(Y, a, b):
distances = neg_squared_euc_dists(Y)
inv_distances = 1. / (1. - a * (distances)) #1 / (1+ad^2b)
return inv_distances
def KLD(P, Q):
return P * torch.log((P+EPS) / Q)
def CE(V, W):
return - V * torch.log(W + EPS) - (1 - V) * torch.log(1 - W + EPS)
def MXLK(P, w, gamma=7.0):
return P * torch.log(w + EPS) + gamma * (1 - P) * torch.log(1 - w + EPS)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Device:', device)
print('load data')
mnist = np.load(DATA_NPZ_PATH)
data = mnist['data']
print('estimate a, b')
ua, ub = find_ab_params(SPREAD, MIN_DIST)
# ua, ub = 1.0, 1.0
# ub = 1.0
print('a:', ua, 'b:', ub)
print('calc V')
try:
V_csc = load_npz('V_csc.npz')
print('Use V cache')
except:
print('Use new V')
V_csc = fuzzy_simplicial_set(data, n_neighbors=15,
random_state=np.random.RandomState(42), metric='euclidean')
save_npz('V_csc', V_csc)
# V = torch.Tensor(V_csc.toarray())
print('make_graph')
graph = make_graph(V_csc, N_EPOCHS)
print('make_epochs_per_sample')
epochs_per_sample = make_epochs_per_sample(graph.data, N_EPOCHS)
print('Trying to put X into GPU')
X = torch.from_numpy(data).float()
X = X.to(device)
# X = X.float()
print('Constructing NN')
encoder = FCEncoder(784, num_layers=5)
encoder = encoder.to(device)
encoder = encoder.float()
init_lr = 1e-3
optimizer = optim.SGD(encoder.parameters(), lr=init_lr, weight_decay=0)
epochs_per_negative_sample = epochs_per_sample / NEG_RATE
epoch_of_next_negative_sample = epochs_per_negative_sample.copy()
epoch_of_next_sample = epochs_per_sample.copy()
head = graph.row
tail = graph.col
rnd_max_idx = X.shape[0]
print('optimizing...')
for epoch in range(1, N_EPOCHS):
batch_i = []
batch_j = []
batch_neg_i = []
for i in range(epochs_per_sample.shape[0]):
if epoch_of_next_sample[i] <= epoch:
i_idx, j_idx = head[i], tail[i]
batch_i.append(i_idx)
batch_j.append(j_idx)
epoch_of_next_sample[i] += epochs_per_sample[i]
n_neg_samples = int(
(epoch - epoch_of_next_negative_sample[i])
/ epochs_per_negative_sample[i]
)
for _ in range(n_neg_samples):
batch_neg_i.append(i_idx)
epoch_of_next_negative_sample[i] += (
n_neg_samples * epochs_per_negative_sample[i]
)
batch_neg_j = torch.randint(0, rnd_max_idx, (len(batch_neg_i),)).tolist()
batch_r = torch.zeros(len(batch_i), dtype=torch.long).tolist() + torch.ones(len(batch_neg_i), dtype=torch.long).tolist()
batch_i += batch_neg_i
batch_j += batch_neg_j
rnd_perm = torch.randperm(len(batch_i))
batch_i = torch.Tensor(batch_i).long()[rnd_perm]
batch_j = torch.Tensor(batch_j).long()[rnd_perm]
batch_r = torch.Tensor(batch_r).long()[rnd_perm]
for i in range(0, len(batch_i), BATCH_SIZE):
bi = batch_i[i:i+BATCH_SIZE]
bj = batch_j[i:i+BATCH_SIZE]
br = batch_r[i:i+BATCH_SIZE]
optimizer.zero_grad()
Y_bi = encoder(X[bi])
Y_bj = encoder(X[bj])
Y_bj[br==1] = Y_bj[br==1].detach()
d = (Y_bi - Y_bj).pow(2).sum(dim=1)
d.register_hook(lambda grad: grad.clamp(min=-D_GRAD_CLIP, max=D_GRAD_CLIP))
dp = d.pow(ub)
w = (1/(1+ua*(dp))).clamp(min=0, max=1)
pw = w[br==0]
rw = w[br==1]
loss = - (torch.log(pw + EPS)).sum()
loss += - (torch.log(1 - rw + EPS)).sum()
loss.backward()
torch.nn.utils.clip_grad_value_(encoder.parameters(), 4)
optimizer.step()
with torch.no_grad():
Y = encoder(X)
# w = w_tsne(Y, ua, ub).clamp(min=0, max=1)
# loss = CE(V, w).sum()
new_lr = (1 - epoch / N_EPOCHS) * init_lr
for param_group in optimizer.param_groups:
param_group['lr'] = new_lr
np.savez_compressed('umap_fast_nn_Y', Y=Y.detach().cpu().numpy())
np.savez_compressed('umap_nn/{:04d}'.format(epoch), Y=Y.detach().cpu().numpy())
print("{:04d}".format(epoch), "{:.7f}".format(new_lr), "{:.2f}".format(loss.mean().item()))
print('Done.')
@jxcodetw
Copy link
Author

jxcodetw commented Mar 6, 2020

UMAP version: '0.3.10'

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment