Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
grizzly_test.py
import time
import pandas as pd
import numpy as np
import grizzly.grizzly as gr
verbosity = False
#data_path = "/data/0.tsv"
#edges = pd.read_csv(data_path, delimiter='\t', encoding='utf-8', header=None, dtype={'0': np.int64, '1': np.int64}, index_col=None)
#links = list(zip(edges[0], edges[1]))
links = [(1, 2),(1, 3),(1, 4),(2, 1),(2, 3),(3, 2)]
start = time.time()
pd_links = pd.DataFrame(links, columns=['src', 'dest'])
gr_links = gr.DataFrameWeld(pd_links)
out_degree = gr_links.groupby(['src']).size().evaluate(verbose=verbosity).reset_index(name='out_degree')
gr_out_degree = gr.DataFrameWeld(out_degree)
srcs = gr_links['src'].unique().evaluate(verbose=verbosity)
gr_ranks = gr.DataFrameWeld(pd.DataFrame(zip(srcs, np.ones(len(srcs))), columns=['src', 'rank']))
gr_tmp_link_counts = gr.merge(gr_links, gr_out_degree)
gr_link_counts = gr_tmp_link_counts.evaluate(verbose=verbosity)
n_iter = 10
for i in range(n_iter):
tmp_rank = gr.merge(gr_link_counts, gr_ranks).evaluate(verbose=verbosity).to_pandas().eval('contrib = rank/out_degree')[['dest','contrib']].rename(columns={'dest':'src', 'contrib':'rank'})
gr_ranks = gr.DataFrameWeld(tmp_rank).groupby('src').sum().evaluate(verbose=verbosity)
print("Runtime for {} iterations= {} secs".format(n_iter, time.time()-start))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.