skojaku/FastRP.py

## FastRP.py
import pandas as pd
from scipy import sparse
import numpy as np

def fastRP(net, dim, window_size, beta=-1, s=3.0, edge_direction=False):
        """
    .. function:: fastRP(net: scipy.sparse matrix, dim: int, window_size: int, beta: float = -1, s: float = 3.0, return_context: bool = False) -> numpy.ndarray

       Fast Random Projection embedding

       See https://arxiv.org/abs/1908.11512.

       **Examples**

       Getting an embedding of a network::

          emb = fastRP(net, dim=1024, window_size=10)

       If the network is directed, one can construct two types of an embedding, one based on the neighbors connected by in-coming edges, and those by out-going edges. You can get the two embeddings by setting `edge_direction=True`.::

          emb_out, emb_in = fastRP(net, dim=1024, window_size=10, edge_direction=True)

       where `emb_out` is an embedding of nodes based on the edges going from the nodes, and `emb_in` is based on the in-coming edges.

       **Note**

       This code is based on the random projection. The random projection method usually requires many embedding dimensions to perform well. To get a sense of how many dimensions are needed, run::

          from sklearn.random_projection import johnson_lindenstrauss_min_dim
          johnson_lindenstrauss_min_dim(n_samples, eps)

       where `eps` is the amount of distortion by the projection.

       :param net: Adjacency matrix
       :type net: scipy.sparse matrix
       :param dim: Embedding dimension
       :type dim: int
       :param window_size: Window size
       :type window_size: int
       :param beta: degree normalization, defaults to -1. Ranges [0,1]. beta = -1 is the strongest normalization. beta = 0 means no regularization.
       :type beta: float, optional
       :param s: Inverse density of the random matrix, defaults to 3.0
       :type s: float, optional
       :param return_context: Set true to get the context embedding (which is an embedding generated by the transpose of the adjacency matrix).
       :type return_context: bool
       :return: embedding matrix
       :rtype: numpy.ndarray of (number of nodes, dimension)
    """


    n_nodes = net.shape[0]
    # Generate random matrix for random projection
    if np.isclose(s, 1):
        X= np.random.randn(n_nodes, dim)
    else:
        X = sparse.random(
            n_nodes,
            dim,
            density=1 / s,
            data_rvs=lambda x: np.sqrt(s) * (2 * np.random.randint(2, size=x) - 1),
        ).toarray()

    emb = _fastRP(net, dim, window_size, beta=beta, X=X.copy())

    if edge_direction:
        emb_cnt = _fastRP(
            net=sparse.csr_matrix(net.T),
            dim=dim,
            window_size=window_size,
            beta=beta,
            X=X.copy(),
        )
        return emb, emb_cnt
    return emb


def _fastRP(net, dim, window_size, X, beta=-1):

    # Get stats
    n_nodes = net.shape[0]
    outdeg = np.array(net.sum(axis=1)).reshape(-1)
    indeg = np.array(net.sum(axis=0)).reshape(-1)

    # Construct the transition matrix
    P = sparse.diags(1 / np.maximum(1, outdeg)) @ net  # Transition matrix
    L = sparse.diags(np.power(np.maximum(indeg.astype(float), 1.0), beta))

    # First random projection
    X0 = (P @ L) @ X.copy()  # to include the self-loops

    # h is an array for normalization
    h = np.ones((n_nodes, 1))
    h0 = h.copy()

    # Iterative projection
    for _ in range(window_size):
        X = P @ X + X0
        h = P @ h + h0

    # Normalization
    X = sparse.diags(1.0 / np.maximum(np.array(h).reshape(-1), 1e-8)) @ X
    return X
	import pandas as pd
	from scipy import sparse
	import numpy as np

	def fastRP(net, dim, window_size, beta=-1, s=3.0, edge_direction=False):
	"""
	.. function:: fastRP(net: scipy.sparse matrix, dim: int, window_size: int, beta: float = -1, s: float = 3.0, return_context: bool = False) -> numpy.ndarray

	Fast Random Projection embedding

	See https://arxiv.org/abs/1908.11512.

	Examples

	Getting an embedding of a network::

	emb = fastRP(net, dim=1024, window_size=10)

	If the network is directed, one can construct two types of an embedding, one based on the neighbors connected by in-coming edges, and those by out-going edges. You can get the two embeddings by setting `edge_direction=True`.::

	emb_out, emb_in = fastRP(net, dim=1024, window_size=10, edge_direction=True)

	where `emb_out` is an embedding of nodes based on the edges going from the nodes, and `emb_in` is based on the in-coming edges.

	Note

	This code is based on the random projection. The random projection method usually requires many embedding dimensions to perform well. To get a sense of how many dimensions are needed, run::

	from sklearn.random_projection import johnson_lindenstrauss_min_dim
	johnson_lindenstrauss_min_dim(n_samples, eps)

	where `eps` is the amount of distortion by the projection.

	:param net: Adjacency matrix
	:type net: scipy.sparse matrix
	:param dim: Embedding dimension
	:type dim: int
	:param window_size: Window size
	:type window_size: int
	:param beta: degree normalization, defaults to -1. Ranges [0,1]. beta = -1 is the strongest normalization. beta = 0 means no regularization.
	:type beta: float, optional
	:param s: Inverse density of the random matrix, defaults to 3.0
	:type s: float, optional
	:param return_context: Set true to get the context embedding (which is an embedding generated by the transpose of the adjacency matrix).
	:type return_context: bool
	:return: embedding matrix
	:rtype: numpy.ndarray of (number of nodes, dimension)
	"""


	n_nodes = net.shape[0]
	# Generate random matrix for random projection
	if np.isclose(s, 1):
	X= np.random.randn(n_nodes, dim)
	else:
	X = sparse.random(
	n_nodes,
	dim,
	density=1 / s,
	data_rvs=lambda x: np.sqrt(s) * (2 * np.random.randint(2, size=x) - 1),
	).toarray()

	emb = _fastRP(net, dim, window_size, beta=beta, X=X.copy())

	if edge_direction:
	emb_cnt = _fastRP(
	net=sparse.csr_matrix(net.T),
	dim=dim,
	window_size=window_size,
	beta=beta,
	X=X.copy(),
	)
	return emb, emb_cnt
	return emb


	def _fastRP(net, dim, window_size, X, beta=-1):

	# Get stats
	n_nodes = net.shape[0]
	outdeg = np.array(net.sum(axis=1)).reshape(-1)
	indeg = np.array(net.sum(axis=0)).reshape(-1)

	# Construct the transition matrix
	P = sparse.diags(1 / np.maximum(1, outdeg)) @ net # Transition matrix
	L = sparse.diags(np.power(np.maximum(indeg.astype(float), 1.0), beta))

	# First random projection
	X0 = (P @ L) @ X.copy() # to include the self-loops

	# h is an array for normalization
	h = np.ones((n_nodes, 1))
	h0 = h.copy()

	# Iterative projection
	for _ in range(window_size):
	X = P @ X + X0
	h = P @ h + h0

	# Normalization
	X = sparse.diags(1.0 / np.maximum(np.array(h).reshape(-1), 1e-8)) @ X
	return X