Skip to content

Instantly share code, notes, and snippets.

@gungorbudak
Created November 19, 2019 14:39
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save gungorbudak/08c7a74c5f0e3692c3cdd2c74e86383a to your computer and use it in GitHub Desktop.
Save gungorbudak/08c7a74c5f0e3692c3cdd2c74e86383a to your computer and use it in GitHub Desktop.
import os
import networkx as nx
WORKDIR = ''
IREFINDEX_FILE = os.path.join(WORKDIR, '9606.mitab.01-22-2018.txt')
def parse_irefindex_alias(alias):
splits = alias.split('|')
for split in splits:
if split.startswith('hgnc:'):
return split[5:]
# if split.startswith('icrogid:'):
# return split
return None
def parse_irefindex_file(irefindex_file):
G = nx.Graph()
with open(irefindex_file) as rows:
header = next(rows)
for row in rows:
cols = row.strip().split('\t')
geneA = parse_irefindex_alias(cols[4])
geneB = parse_irefindex_alias(cols[5])
if geneA and geneB:
G.add_edge(geneA, geneB)
largest_cc = max(nx.connected_components(G), key=len)
S = G.subgraph(largest_cc).copy()
return S
irefindex = parse_irefindex_file(IREFINDEX_FILE)
print(len(irefindex.nodes()), len(irefindex.edges())) # 24711 443720
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment