Last active
June 10, 2025 20:32
-
-
Save skojaku/cee26755645b133a69d6630c79307cde to your computer and use it in GitHub Desktop.
Fast python code for calculating disruption index
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import numpy as np | |
| from scipy import sparse | |
| from tqdm.auto import tqdm | |
| from numba import njit, prange | |
| def calc_disruption_index(net): | |
| """ | |
| Compute disruption indices for all nodes in a citation network. | |
| The disruption index measures how much a paper disrupts or consolidates | |
| existing knowledge by analyzing citation patterns of papers that cite it. | |
| Parameters | |
| ---------- | |
| net : scipy.sparse matrix | |
| Citation network in CSR format where net[i,j] = 1 if paper i cites paper j. | |
| Should be a square matrix of shape (n_papers, n_papers). | |
| Returns | |
| ------- | |
| numpy.ndarray | |
| Disruption index for each node, computed as (NF - NB) / (NF + NB + NR) | |
| where NF = forward citations, NB = backward citations, NR = reference citations. | |
| Values range from -1 (consolidating) to +1 (disrupting). | |
| """ | |
| netT = net.T.tocsr() | |
| indptr_T = netT.indptr | |
| indices_T = netT.indices | |
| return _calc_disruption_index(net.indptr, net.indices, indptr_T, indices_T, net.shape[0]) | |
| @njit(parallel=True) | |
| def _calc_disruption_index(indptr, indices, indptr_T, indices_T, n_nodes): | |
| """ | |
| Internal numba-compiled function to compute disruption indices in parallel. | |
| Parameters | |
| ---------- | |
| indptr : numpy.ndarray | |
| CSR format indptr array for the original citation matrix. | |
| indices : numpy.ndarray | |
| CSR format indices array for the original citation matrix. | |
| indptr_T : numpy.ndarray | |
| CSR format indptr array for the transposed citation matrix. | |
| indices_T : numpy.ndarray | |
| CSR format indices array for the transposed citation matrix. | |
| n_nodes : int | |
| Number of nodes (papers) in the network. | |
| Returns | |
| ------- | |
| numpy.ndarray | |
| Disruption index for each node. | |
| """ | |
| NF = np.zeros(n_nodes, dtype=np.int32) | |
| NB = np.zeros(n_nodes, dtype=np.int32) | |
| NR = np.zeros(n_nodes, dtype=np.int32) | |
| for i in prange(n_nodes): | |
| NF[i], NB[i], NR[i] = compute_disruption_for_node( | |
| i, indptr, indices, indptr_T, indices_T | |
| ) | |
| return (NF - NB) / np.maximum(NR + NB + NF, 1) | |
| @njit(nogil=True) | |
| def compute_disruption_for_node( | |
| i, | |
| indptr, indices, # CSR format of citation network | |
| indptr_T, indices_T # CSR format of transposed (who cites who) | |
| ): | |
| """ | |
| Compute disruption components (NF, NB, NR) for a single node. | |
| This function calculates the three components needed for the disruption index: | |
| - NF (Forward): Papers that cite node i but don't cite i's references | |
| - NB (Backward): Papers that cite both node i and i's references | |
| - NR (Reference): Papers that cite i's references but don't cite i | |
| Parameters | |
| ---------- | |
| i : int | |
| Index of the node (paper) to compute disruption components for. | |
| indptr : numpy.ndarray | |
| CSR format indptr array for the original citation matrix. | |
| indices : numpy.ndarray | |
| CSR format indices array for the original citation matrix. | |
| indptr_T : numpy.ndarray | |
| CSR format indptr array for the transposed citation matrix. | |
| indices_T : numpy.ndarray | |
| CSR format indices array for the transposed citation matrix. | |
| Returns | |
| ------- | |
| tuple of int | |
| (NF_i, NB_i, NR_i) - the three components of the disruption index. | |
| """ | |
| # Papers that i cites (i's references) | |
| ref_start = indptr[i] | |
| ref_end = indptr[i+1] | |
| # Papers that cite i | |
| cite_start = indptr_T[i] | |
| cite_end = indptr_T[i+1] | |
| NF_i, NB_i, NR_i = 0, 0, 0 | |
| # For each paper j that cites i, check if it also cites i's references | |
| for j in indices_T[cite_start:cite_end]: | |
| j_ref_start = indptr[j] | |
| j_ref_end = indptr[j+1] | |
| common_refs = np.intersect1d( | |
| indices[j_ref_start:j_ref_end], | |
| indices[ref_start:ref_end], assume_unique=True | |
| ) | |
| if len(common_refs) > 0: | |
| NB_i += 1 | |
| else: | |
| NF_i += 1 | |
| # For NR: need to find papers that co-cite with i but don't cite i | |
| # Co-citation means both i and j cite at least one common paper | |
| # j -> i -> ell | |
| citing_papers = [] | |
| for ell in indices[ref_start:ref_end]: | |
| for citing_ell in indices_T[indptr_T[ell]:indptr_T[ell+1]]: | |
| if citing_ell != i: | |
| citing_papers.append(citing_ell) | |
| citing_papers = np.unique(np.array(citing_papers)) | |
| common_cites = np.intersect1d(citing_papers, indices_T[cite_start:cite_end], assume_unique=True) | |
| NR_i = len(citing_papers) - len(common_cites) | |
| return NF_i, NB_i, NR_i |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
A Python implementation of the disruption index
A fast implementation of the disruption index for citation networks using Numba.
Overview
The disruption index is a bibliometric measure that quantifies how much a scientific paper disrupts or consolidates existing knowledge. It analyzes the citation patterns of papers that reference a given work to determine whether it represents a paradigm shift or builds incrementally on existing research.
Formula
The disruption index is calculated as:
Where:
ibut don't cite any ofi's referencesiand at least one ofi's referencesi's references but don't citeiInterpretation
Values range from -1 (maximally consolidating) to +1 (maximally disrupting).
Usage
Requirements
numpyscipynumbaInput Format
The citation network should be provided as a SciPy sparse CSR matrix where:
net[i,j] = 1indicates papericites paperjReferences
Funk, R. J., & Owen-Smith, J. (2017). A dynamic network measure of technological change. Management Science, 63(3), 791-817.