-
-
Save skojaku/cee26755645b133a69d6630c79307cde to your computer and use it in GitHub Desktop.
Fast python code for calculating disruption index
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from scipy import sparse | |
from tqdm.auto import tqdm | |
def calc_disruption_index(net, batch_size=None): | |
"""Calculate the Disruption index given by | |
DI = (NF - NB) / (NR + NB + NF), | |
where | |
- NF: Number of papers citing a focal paper BUT NOT citing any of the reference of the focal paper | |
- NB: Number of papers citing a focal paper AND citing at least one reference of the focal paper | |
- NR: Number of papers not citing a focal paper but citing at least one reference of the focal paper | |
net: sparse scipy matrix of a citation network. net[i,j] = 1 if i cites j. | |
params: sparse.csr_matrix | |
batch_size: batch size, default to None. Setting a larger batch_size makes computation faster, at the expense of memory. If None, the batch_size is set to the maximum. | |
params: None or int | |
Reference: | |
- Funk, R. J. & Owen-Smith, J. A dynamic network measure of technological change. Manage. Sci. 63, 791–817 (2017). | |
- Wu, L., Wang, D. & Evans, J.A. Large teams develop and small teams disrupt science and technology. Nature 566, 378–382 (2019). https://doi.org/10.1038/s41586-019-0941-9 | |
http://russellfunk.org/cdindex/static/funk_ms_2016.pdf | |
""" | |
if batch_size is None: | |
return _calc_disruption_index(net) | |
# Homogenize the input data type | |
net = sparse.csr_matrix(net) | |
n_nodes = net.shape[0] | |
n_chunks = int(n_nodes / batch_size) | |
chunks = np.array_split(np.arange(n_nodes).astype(int), n_chunks) | |
DI = np.zeros(n_nodes) | |
netT = sparse.csr_matrix(net.T) | |
for focal_node_ids in tqdm(chunks): | |
is_relevant = ( | |
np.array(net[focal_node_ids, :].sum(axis=0)).reshape(-1) | |
+ np.array(net[:, focal_node_ids].sum(axis=1)).reshape(-1) | |
+ np.array((net[focal_node_ids, :] @ netT).sum(axis=0)).reshape(-1) | |
) | |
is_relevant[focal_node_ids] = -1 | |
supp_node_ids = np.where(is_relevant > 0)[0] | |
node_ids = np.concatenate([focal_node_ids, supp_node_ids]) | |
subnet = net[node_ids, :][:, node_ids].copy() | |
subnet.sort_indices() | |
dindex = _calc_disruption_index(subnet) | |
DI[focal_node_ids] = dindex[: len(focal_node_ids)] | |
return DI | |
def _calc_disruption_index(net): | |
"""Calculate the Disruption index given by | |
DI = (NF - NB) / (NR + NB + NF), | |
where | |
- NF: Number of papers citing a focal paper BUT NOT citing any of the reference of the focal paper | |
- NB: Number of papers citing a focal paper AND citing at least one reference of the focal paper | |
- NR: Number of papers not citing a focal paper but citing at least one reference of the focal paper | |
net: sparse scipy matrix of a citation network. net[i,j] = 1 if i cites j. | |
params: sparse.csr_matrix | |
Reference: | |
- Funk, R. J. & Owen-Smith, J. A dynamic network measure of technological change. Manage. Sci. 63, 791–817 (2017). | |
- Wu, L., Wang, D. & Evans, J.A. Large teams develop and small teams disrupt science and technology. Nature 566, 378–382 (2019). https://doi.org/10.1038/s41586-019-0941-9 | |
http://russellfunk.org/cdindex/static/funk_ms_2016.pdf | |
""" | |
# Homogenize the input data type | |
net = sparse.csr_matrix(net) | |
net.data = net.data * 0 + 1 | |
# | |
# Calculate quantities for calculating the disruption index | |
# | |
# AAT[i,j] = 1 if papers i and j cocite at least one paper. | |
AAT = net @ net.T | |
AAT.data = np.ones_like(AAT.data) | |
AAT.setdiag(0) | |
AAT.eliminate_zeros() | |
# AT[i,j] = 1 if papers i is cited by paper j. | |
AT = sparse.csr_matrix(net.copy().T) | |
AT.data = np.ones_like(AT.data) | |
# NB[i,j] = AT[i,j] * AAT[i,j], which means | |
# NB[i,j] = 1 if i is cited by j, and i and j co-cite at least one paper | |
NB = AT.multiply(AAT) | |
# NF[i,j] = 1 if AT[i,j] = 1 and NB[i,j] = 0, which means | |
# j is a paper citing a focal paper i but not cociting i's reference | |
NF = AT - NB | |
# AAT[i,j] = 1 if AAT[i,j] - NB[i,j], which means | |
# i and j are co-citing at least one paper and but j does not cite i | |
NR = AAT - NB | |
# Calculate the disruption | |
DI = (NF.sum(axis=1) - NB.sum(axis=1)) / np.maximum( | |
NR.sum(axis=1) + NB.sum(axis=1) + NF.sum(axis=1), 1 | |
) | |
DI = np.array(DI).reshape(-1) | |
return DI |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment