Skip to content

Instantly share code, notes, and snippets.

@skojaku
Last active April 20, 2023 01:41
Show Gist options
  • Save skojaku/8b4c75edbb61db3c9fbe914eb4bf585a to your computer and use it in GitHub Desktop.
Save skojaku/8b4c75edbb61db3c9fbe914eb4bf585a to your computer and use it in GitHub Desktop.
fastRP
import pandas as pd
from scipy import sparse
import numpy as np
def fastRP(net, dim, window_size, beta=-1, s=3.0, edge_direction=False):
"""
.. function:: fastRP(net: scipy.sparse matrix, dim: int, window_size: int, beta: float = -1, s: float = 3.0, return_context: bool = False) -> numpy.ndarray
Fast Random Projection embedding
See https://arxiv.org/abs/1908.11512.
**Examples**
Getting an embedding of a network::
emb = fastRP(net, dim=1024, window_size=10)
If the network is directed, one can construct two types of an embedding, one based on the neighbors connected by in-coming edges, and those by out-going edges. You can get the two embeddings by setting `edge_direction=True`.::
emb_out, emb_in = fastRP(net, dim=1024, window_size=10, edge_direction=True)
where `emb_out` is an embedding of nodes based on the edges going from the nodes, and `emb_in` is based on the in-coming edges.
**Note**
This code is based on the random projection. The random projection method usually requires many embedding dimensions to perform well. To get a sense of how many dimensions are needed, run::
from sklearn.random_projection import johnson_lindenstrauss_min_dim
johnson_lindenstrauss_min_dim(n_samples, eps)
where `eps` is the amount of distortion by the projection.
:param net: Adjacency matrix
:type net: scipy.sparse matrix
:param dim: Embedding dimension
:type dim: int
:param window_size: Window size
:type window_size: int
:param beta: degree normalization, defaults to -1. Ranges [0,1]. beta = -1 is the strongest normalization. beta = 0 means no regularization.
:type beta: float, optional
:param s: Inverse density of the random matrix, defaults to 3.0
:type s: float, optional
:param return_context: Set true to get the context embedding (which is an embedding generated by the transpose of the adjacency matrix).
:type return_context: bool
:return: embedding matrix
:rtype: numpy.ndarray of (number of nodes, dimension)
"""
n_nodes = net.shape[0]
# Generate random matrix for random projection
if np.isclose(s, 1):
X= np.random.randn(n_nodes, dim)
else:
X = sparse.random(
n_nodes,
dim,
density=1 / s,
data_rvs=lambda x: np.sqrt(s) * (2 * np.random.randint(2, size=x) - 1),
).toarray()
emb = _fastRP(net, dim, window_size, beta=beta, X=X.copy())
if edge_direction:
emb_cnt = _fastRP(
net=sparse.csr_matrix(net.T),
dim=dim,
window_size=window_size,
beta=beta,
X=X.copy(),
)
return emb, emb_cnt
return emb
def _fastRP(net, dim, window_size, X, beta=-1):
# Get stats
n_nodes = net.shape[0]
outdeg = np.array(net.sum(axis=1)).reshape(-1)
indeg = np.array(net.sum(axis=0)).reshape(-1)
# Construct the transition matrix
P = sparse.diags(1 / np.maximum(1, outdeg)) @ net # Transition matrix
L = sparse.diags(np.power(np.maximum(indeg.astype(float), 1.0), beta))
# First random projection
X0 = (P @ L) @ X.copy() # to include the self-loops
# h is an array for normalization
h = np.ones((n_nodes, 1))
h0 = h.copy()
# Iterative projection
for _ in range(window_size):
X = P @ X + X0
h = P @ h + h0
# Normalization
X = sparse.diags(1.0 / np.maximum(np.array(h).reshape(-1), 1e-8)) @ X
return X
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment