Skip to content

Instantly share code, notes, and snippets.

@maxidl
Created September 7, 2019 19:31
Show Gist options
  • Save maxidl/0bc8891231d7e6fa1c8fc51aeffbf618 to your computer and use it in GitHub Desktop.
Save maxidl/0bc8891231d7e6fa1c8fc51aeffbf618 to your computer and use it in GitHub Desktop.
import cudf
import cugraph
from numba import cuda
from numba.cuda.random import create_xoroshiro128p_states, xoroshiro128p_uniform_float32
import numpy as np
# gdf = cudf.read_csv('zachary.ssv', header=None, sep=' ', dtype=['int32', 'int32'])
gdf = cudf.read_csv('Flickr-labelled.edgelist', header=None, sep=' ', dtype=['int32', 'int32'])
gdf.columns = ['src', 'dest']
gdf = gdf.sort_values(by='src')
G = cugraph.Graph()
G.add_edge_list(gdf['src'], gdf['dest'])
adj_list = G.view_adj_list()
offsets, indices = adj_list[0], adj_list[1]
offsets, indices = cuda.to_device(offsets), cuda.to_device(indices)
nodes = gdf['src'].unique().values
@cuda.jit
def generate_walks(start_nodes, out, offsets, indices, rng_states):
thread_id = cuda.grid(1)
if thread_id < start_nodes.size: # Check array boundaries
start_node = start_nodes[thread_id]
out[thread_id][0] = start_node
curr_node = start_node
for i in range(1, out.shape[1]):
# get neighbors
if curr_node == -1:
next_node = curr_node
else:
start_idx = offsets[curr_node]
end_idx = offsets[curr_node + 1]
neighbors = indices[start_idx:end_idx]
num_neighbors = len(neighbors)
if num_neighbors > 0:
rand_float = xoroshiro128p_uniform_float32(rng_states, thread_id)
choice = int(rand_float * num_neighbors)
next_node = neighbors[choice]
else:
next_node = -1
out[thread_id][i] = next_node
curr_node = next_node
walk_length = 80
walks_per_node = 1
start_nodes = np.hstack([nodes] * walks_per_node)
out = np.full_like(start_nodes, -2).repeat(walk_length).reshape(start_nodes.shape[0], walk_length)
print(out.nbytes / 1e9)
threads_per_block = 64
blocks_per_grid = (start_nodes.size + (threads_per_block - 1)) // threads_per_block
rng_states = create_xoroshiro128p_states(threads_per_block * blocks_per_grid, seed=1)
generate_walks[blocks_per_grid, threads_per_block](start_nodes, out, offsets, indices, rng_states)
# walks are stored in "out", with -1 denoting end of walk
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment