Skip to content

Instantly share code, notes, and snippets.

@flaviovdf
Last active May 10, 2018 18:51
Show Gist options
  • Save flaviovdf/c74f597cdfd9db787cd670d55bbecf26 to your computer and use it in GitHub Desktop.
Save flaviovdf/c74f597cdfd9db787cd670d55bbecf26 to your computer and use it in GitHub Desktop.
# -*- coding: utf8
from collections import defaultdict
import gzip
def get_graph_stamps(path, top=None):
count = defaultdict(int)
srcs = set()
with gzip.open(path, 'r') as in_file:
for line in in_file:
if b',' in line:
spl = line.split(b',')
else:
spl = line.split()
src, dst = spl[:2]
count[dst] += 1
srcs.add(src)
if top is None:
valid = srcs
else:
valid = set()
for v, k in sorted(((v, k) for k, v in count.items()), reverse=True):
if k in srcs:
valid.add(k)
if len(valid) == top:
break
graph = {}
ids = {}
with gzip.open(path, 'r') as in_file:
timestamps = []
for line in in_file:
if b',' in line:
spl = line.split(b',')
else:
spl = line.split()
src, dst = spl[:2]
stamp = float(spl[-1])
if src not in valid:
continue
if dst not in valid:
continue
if src not in graph:
graph[src] = {}
if dst not in graph[src]:
graph[src][dst] = 0
graph[src][dst] += 1
if dst in ids:
timestamps[ids[dst]].append(stamp)
else:
ids[dst] = len(timestamps)
timestamps.append([stamp])
for id_ in list(graph.keys()):
if id_ not in ids:
del graph[id_]
for id_ in ids:
if id_ not in graph:
graph[id_] = {}
return timestamps, graph, ids
if __name__ == '__main__':
path = sys.argv[1]
timestamps, graph, ids = get_graph_stamps(path)
vals = []
rows = []
cols = []
for src in graph:
for dst in graph[src]:
if src not in ids or dst not in ids:
continue
rows.append(ids[src])
cols.append(ids[dst])
vals.append(graph[src][dst])
GT = sp.csr_matrix((vals, (rows, cols)), dtype='d')
n = sum(len(t) for t in timestamps)
print(len(timestamps))
print(len(graph))
print(len(ids))
print(n)
print(GT.nnz / (GT.shape[0] * GT.shape[1]))
print((GT.sum(axis=1) == 0).sum() / (GT.shape[1]))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment