Last active
April 20, 2023 01:41
-
-
Save skojaku/8b4c75edbb61db3c9fbe914eb4bf585a to your computer and use it in GitHub Desktop.
fastRP
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
from scipy import sparse | |
import numpy as np | |
def fastRP(net, dim, window_size, beta=-1, s=3.0, edge_direction=False): | |
""" | |
.. function:: fastRP(net: scipy.sparse matrix, dim: int, window_size: int, beta: float = -1, s: float = 3.0, return_context: bool = False) -> numpy.ndarray | |
Fast Random Projection embedding | |
See https://arxiv.org/abs/1908.11512. | |
**Examples** | |
Getting an embedding of a network:: | |
emb = fastRP(net, dim=1024, window_size=10) | |
If the network is directed, one can construct two types of an embedding, one based on the neighbors connected by in-coming edges, and those by out-going edges. You can get the two embeddings by setting `edge_direction=True`.:: | |
emb_out, emb_in = fastRP(net, dim=1024, window_size=10, edge_direction=True) | |
where `emb_out` is an embedding of nodes based on the edges going from the nodes, and `emb_in` is based on the in-coming edges. | |
**Note** | |
This code is based on the random projection. The random projection method usually requires many embedding dimensions to perform well. To get a sense of how many dimensions are needed, run:: | |
from sklearn.random_projection import johnson_lindenstrauss_min_dim | |
johnson_lindenstrauss_min_dim(n_samples, eps) | |
where `eps` is the amount of distortion by the projection. | |
:param net: Adjacency matrix | |
:type net: scipy.sparse matrix | |
:param dim: Embedding dimension | |
:type dim: int | |
:param window_size: Window size | |
:type window_size: int | |
:param beta: degree normalization, defaults to -1. Ranges [0,1]. beta = -1 is the strongest normalization. beta = 0 means no regularization. | |
:type beta: float, optional | |
:param s: Inverse density of the random matrix, defaults to 3.0 | |
:type s: float, optional | |
:param return_context: Set true to get the context embedding (which is an embedding generated by the transpose of the adjacency matrix). | |
:type return_context: bool | |
:return: embedding matrix | |
:rtype: numpy.ndarray of (number of nodes, dimension) | |
""" | |
n_nodes = net.shape[0] | |
# Generate random matrix for random projection | |
if np.isclose(s, 1): | |
X= np.random.randn(n_nodes, dim) | |
else: | |
X = sparse.random( | |
n_nodes, | |
dim, | |
density=1 / s, | |
data_rvs=lambda x: np.sqrt(s) * (2 * np.random.randint(2, size=x) - 1), | |
).toarray() | |
emb = _fastRP(net, dim, window_size, beta=beta, X=X.copy()) | |
if edge_direction: | |
emb_cnt = _fastRP( | |
net=sparse.csr_matrix(net.T), | |
dim=dim, | |
window_size=window_size, | |
beta=beta, | |
X=X.copy(), | |
) | |
return emb, emb_cnt | |
return emb | |
def _fastRP(net, dim, window_size, X, beta=-1): | |
# Get stats | |
n_nodes = net.shape[0] | |
outdeg = np.array(net.sum(axis=1)).reshape(-1) | |
indeg = np.array(net.sum(axis=0)).reshape(-1) | |
# Construct the transition matrix | |
P = sparse.diags(1 / np.maximum(1, outdeg)) @ net # Transition matrix | |
L = sparse.diags(np.power(np.maximum(indeg.astype(float), 1.0), beta)) | |
# First random projection | |
X0 = (P @ L) @ X.copy() # to include the self-loops | |
# h is an array for normalization | |
h = np.ones((n_nodes, 1)) | |
h0 = h.copy() | |
# Iterative projection | |
for _ in range(window_size): | |
X = P @ X + X0 | |
h = P @ h + h0 | |
# Normalization | |
X = sparse.diags(1.0 / np.maximum(np.array(h).reshape(-1), 1e-8)) @ X | |
return X |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment