Skip to content

Instantly share code, notes, and snippets.

@mitmul
Last active April 18, 2018 11:50
Show Gist options
  • Save mitmul/880df6826a746cacacf0c9ca69244b79 to your computer and use it in GitHub Desktop.
Save mitmul/880df6826a746cacacf0c9ca69244b79 to your computer and use it in GitHub Desktop.
(gdb) bt
#0 0x0000000000000007 in ?? ()
#1 0x00007fffccf8c116 in google::protobuf::MessageLite::AppendToString(std::string*) const ()
from /home/ubuntu/miniconda/lib/libprotobuf.so.14
#2 0x00007fffccf8c472 in google::protobuf::MessageLite::SerializeAsString() const ()
from /home/ubuntu/miniconda/lib/libprotobuf.so.14
#3 0x00007fffcd5d613c in tc::ExecutionEngine::getHandle(std::string const&, std::vector<DLTensor const*, std::allocator<DLTensor const*> > const&, tc::MappingOptions const&)::{lambda(std::unique_ptr<tc::ExecutionEngine::ExecutorInfo, std::default_delete<tc::ExecutionEngine::ExecutorInfo> > const&)#1}::operator()(std::unique_ptr<tc::ExecutionEngine::ExecutorInfo, std::default_delete<tc::ExecutionEngine::ExecutorInfo> > const&) const () from /home/ubuntu/miniconda/lib/libtc_core.so
#4 0x00007fffcd5d63d7 in tc::ExecutionEngine::getHandle(std::string const&, std::vector<DLTensor const*, std::allocator<DLTensor const*> > const&, tc::MappingOptions const&) ()
from /home/ubuntu/miniconda/lib/libtc_core.so
#5 0x00007fffcd5d73bb in tc::ExecutionEngine::compile(std::string const&, std::vector<DLTensor const*, std::allocator<DLTensor const*> > const&, tc::MappingOptions const&) ()
from /home/ubuntu/miniconda/lib/libtc_core.so
#6 0x00007fffcdc3705d in __pyx_pf_4cupy_4core_2tc_8TCKernel_8autotune (
__pyx_v_tuner_min_launch_total_threads=<optimized out>, __pyx_v_log_generations=<optimized out>,
__pyx_v_restore_number=<optimized out>, __pyx_v_restore_from_proto=<optimized out>,
__pyx_v_proto=<optimized out>, __pyx_v_gpus=<optimized out>, __pyx_v_threads=<optimized out>,
__pyx_v_number_elites=<optimized out>, __pyx_v_generations=<optimized out>,
__pyx_v_mutation_rate=<optimized out>, __pyx_v_crossover_rate=<optimized out>,
__pyx_v_pop_size=<optimized out>, __pyx_v_starting_points=<optimized out>,
__pyx_v_base_mapping=<optimized out>, __pyx_v_cache_file=<optimized out>,
__pyx_v_inputs=<optimized out>, __pyx_v_self=0x7ffff6c6dd50) at cupy/core/tc.cpp:5796
#7 __pyx_pw_4cupy_4core_2tc_8TCKernel_9autotune (__pyx_v_self=0x7ffff6c6dd50,
__pyx_args=<optimized out>, __pyx_kwds=<optimized out>) at cupy/core/tc.cpp:4854
#8 0x0000555555662fd4 in _PyCFunction_FastCallDict ()
#9 0x0000555555690f24 in _PyCFunction_FastCallKeywords ()
#10 0x00005555556f0bec in call_function ()
#11 0x0000555555715eb1 in _PyEval_EvalFrameDefault ()
#12 0x00005555556eb529 in PyEval_EvalCodeEx ()
#13 0x00005555556ec2cc in PyEval_EvalCode ()
#14 0x0000555555768af4 in run_mod ()
#15 0x0000555555768ef1 in PyRun_FileExFlags ()
#16 0x00005555557690f4 in PyRun_SimpleFileExFlags ()
#17 0x000055555576cc28 in Py_Main ()
#18 0x000055555563471e in main ()
import cupy as cp
import tensor_comprehensions as tc
lang = """
def matmul1(float(M,N) A, float(N,K) B) -> (output) {
output(i, j) +=! A(i, kk) * B(kk, j)
}
"""
mat1 = cp.random.randn(100, 400).astype(cp.float32)
mat2 = cp.random.randn(400, 500).astype(cp.float32)
inputs = [mat1, mat2]
matmul_tc = cp.TCKernel(lang, 'matmul1', inputs)
matmul_tc.autotune(inputs, cache_file='matmul1_cache', base_mapping='mlp')
outputs = matmul_tc(mat1, mat2)
print(outputs)
print(mat1.dot(mat2))
@mitmul
Copy link
Author

mitmul commented Apr 18, 2018

tc_test.py results in the error below:

$ python ../test.py
Generation 0 Jobs(Compiled, GPU)/total (10, 10)/10 (best/median/worst)us: 484/773/1708
Generation 1 Jobs(Compiled, GPU)/total (10, 10)/10 (best/median/worst)us: 204/487/1709
Dumping cache to matmul1_cache.cuda/options
[1] 6136 segmentation fault (core dumped) python ../test.py

The code used here: https://github.com/cupy/cupy/pull/1122/files#diff-b049f56ccadf7e9f004c20b3fa09c1ceR251

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment