Skip to content

Instantly share code, notes, and snippets.

@sklam
Created August 19, 2015 20:39
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sklam/f62f1f48bb0be78f9ceb to your computer and use it in GitHub Desktop.
Save sklam/f62f1f48bb0be78f9ceb to your computer and use it in GitHub Desktop.
Test script for checking the NVVM bug
"""
Run with NUMBAPRO_NVVM set to the path to different libvvm library
e.g. (on OSX)
NUMBAPRO_NVVM=/Developer/NVIDIA/CUDA-7.5/nvvm/lib/libnvvm.dylib python testnvvm.py
"""
from llvmlite import ir #, binding as llvm
from numba.cuda.cudadrv.nvvm import (llvm_to_ptx, set_cuda_kernel,
fix_data_layout)
m = ir.Module()
ftype = ir.FunctionType(ir.VoidType(), [ir.IntType(32).as_pointer(),
ir.IntType(32)])
fn = ir.Function(m, ftype, "foo")
builder = ir.IRBuilder(fn.append_basic_block('entry'))
[ptr, ip_max] = fn.args
ptr.name = "ptr"
ip_max.name = "ip_max"
ip_var = builder.alloca(ir.IntType(32))
builder.store(ir.Constant(ir.IntType(32), 0), ip_var)
head_label = builder.append_basic_block('head')
builder.branch(head_label)
builder.position_at_end(head_label)
ip = builder.load(ip_var)
ip_add_one = builder.add(ip, ir.Constant(ip.type, 1))
builder.store(ip_add_one, ip_var)
pred_continue = builder.icmp_signed('<', ip, ip_max)
loop_label = builder.append_basic_block('loop')
end_label = builder.append_basic_block('end')
builder.cbranch(pred_continue, loop_label, end_label)
# Loop:
builder.position_at_end(loop_label)
def insert_branch(expect):
if_label = builder.append_basic_block('if')
endif_label = builder.append_basic_block('endif')
cond = builder.icmp_signed('==', ip, expect)
builder.cbranch(cond, if_label, endif_label)
# If:
builder.position_at_end(if_label)
gep = builder.gep(ptr, [ip])
builder.store(ip, gep)
builder.branch(head_label)
# End:
builder.position_at_end(endif_label)
for i in range(2000):
insert_branch(ir.Constant(ir.IntType(32), i))
builder.branch(head_label)
# End:
builder.position_at_end(end_label)
builder.ret_void()
# llvm.view_function_cfg(fn)
set_cuda_kernel(fn)
fix_data_layout(m)
llvmir = str(m)
print('line ct', len(list(llvmir.splitlines())))
ptx = llvm_to_ptx(llvmir)
print(ptx.decode('ascii'))
@gmarkall
Copy link

Just a note: to run this, I had to change m to be an llvmlite.llvmpy.core.Module, because only it has the get_or_insert_named_metadata function used by set_cuda_kernel. Is it likely that I've got something mismatched somewhere?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment