Created
April 27, 2017 11:39
-
-
Save maleadt/c544b76ce742673ab61f9c4ff0ba465e to your computer and use it in GitHub Desktop.
CUDAnative.jl/#58
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" | |
target triple = "nvptx64-nvidia-cuda" | |
define void @kernel(i32, i32* nocapture) { | |
top: | |
%2 = shl i32 %0, 1 | |
%3 = add i32 %2, 524288 | |
store i32 %3, i32* %1, align 8 | |
ret void | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
.version 3.2 | |
.target sm_35 | |
.address_size 64 | |
.visible .entry kernel( | |
.param .u32 param_0, | |
.param .u64 param_1 | |
) | |
{ | |
.reg .s32 %r<4>; | |
.reg .s64 %rd<3>; | |
ld.param.u32 %r1, [param_0]; | |
ld.param.u64 %rd1, [param_1]; | |
cvta.to.global.u64 %rd2, %rd1; | |
shl.b32 %r2, %r1, 1; | |
add.s32 %r3, %r2, 524288; | |
st.global.u32 [%rd2], %r3; | |
ret; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" | |
target triple = "nvptx64-nvidia-cuda" | |
define void @kernel(i64, i64* nocapture) { | |
top: | |
%2 = shl i64 %0, 1 | |
%3 = add i64 %2, 524288 | |
store i64 %3, i64* %1, align 8 | |
ret void | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
.version 3.2 | |
.target sm_35 | |
.address_size 64 | |
.visible .entry kernel( | |
.param .u64 param_0, | |
.param .u64 param_1 | |
) | |
{ | |
.reg .s64 %rd<6>; | |
ld.param.u64 %rd1, [param_0]; | |
ld.param.u64 %rd2, [param_1]; | |
cvta.to.global.u64 %rd3, %rd2; | |
shl.b64 %rd4, %rd1, 1; | |
add.s64 %rd5, %rd4, 524288; | |
st.global.u64 [%rd3], %rd5; | |
ret; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using CUDAdrv, CUDAnative | |
function kernel{T}(one::T, ptr::Ptr{T}) | |
val = T(524288) + T(2) * one | |
Base.pointerset(ptr, val, 1, 8) | |
return nothing | |
end | |
dev = CuDevice(0) | |
ctx = CuContext(dev) | |
function test(name, T) | |
ref = CuArray{T}(1) | |
@cuda (1,1) kernel(T(1), pointer(ref)) | |
println("$name: ", Array(ref)[1]) | |
if !isfile("$name.ll") | |
open("$name.ll", "w") do io | |
CUDAnative.code_llvm(io, kernel, Tuple{T, Ptr{T}}; | |
dump_module=true, cap=capability(dev)) | |
end | |
end | |
end | |
test("32bit", Int32) | |
test("64bit", Int64) | |
destroy(ctx) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using CUDAdrv, CUDAnative, LLVM | |
dev = CuDevice(0) | |
ctx = CuContext(dev) | |
function test(name, T) | |
ir = readstring("$name.ll") | |
mod = parse(LLVM.Module, ir) | |
entry = get(functions(mod), "kernel") | |
ptx = CUDAnative.mcgen(mod, entry, capability(dev)) | |
cuda_mod = CuModule(ptx) | |
cuda_fun = CuFunction(cuda_mod, "kernel") | |
ref = CuArray{Int64}(1) | |
cudacall(cuda_fun, 1, 1, Tuple{Int32, Ptr{Int64}}, Int32(1), pointer(ref)) | |
println("$name: ", Array(ref)[1]) | |
if !isfile("$name.ptx") | |
open("$name.ptx", "w") do io | |
write(io, ptx) | |
end | |
end | |
end | |
test("32bit", Int32) | |
test("64bit", Int64) | |
destroy(ctx) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
ptx: ptx.cpp | |
clang++ -o $@ $< -I/opt/cuda/include -lcuda |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env julia | |
using CUDAdrv, CUDAnative | |
function oob(reference) | |
index = 4130784 - 32784 * blockIdx().x + 16 * blockIdx().x + threadIdx().x + 2051 | |
ref = @cuStaticSharedMem(Int32, (16, 16)) | |
for ty = 0:15 | |
i = index + 2049 * ty + 1 | |
@inbounds ref[threadIdx().x, ty + 1] = reference[i] | |
end | |
return nothing | |
end | |
function main(args) | |
array = CuArray{Int32}(2049, 2049) | |
@cuda (1,1) oob(array) | |
end | |
dev = CuDevice(0) | |
ctx = CuContext(dev, CUDAdrv.SCHED_BLOCKING_SYNC) | |
main(ARGS) | |
destroy(ctx) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <cuda.h> | |
#define CHECK(err) __check(err, __FILE__, __LINE__) | |
static void __check(CUresult err, const char *file, const int line) { | |
if (CUDA_SUCCESS != err) { | |
const char *name, *descr; | |
cuGetErrorName(err, &name); | |
cuGetErrorString(err, &descr); | |
std::cerr << name << ": " << descr << ", at " << file << ":" << line << std::endl; | |
abort(); | |
} | |
} | |
template <typename T> | |
void test(const std::string name) | |
{ | |
CUmodule mod; | |
std::string path = name + ".ptx"; | |
CHECK(cuModuleLoad(&mod, path.c_str())); | |
CUfunction fun; | |
CHECK(cuModuleGetFunction(&fun, mod, "kernel")); | |
T *gpu_ref; | |
CHECK(cuMemAlloc((CUdeviceptr*) &gpu_ref, sizeof(T))); | |
T one = 1; | |
void *args[2] = {&one, &gpu_ref}; | |
cuLaunchKernel(fun, 1, 1, 1, 1, 1, 1, 0, NULL, args, NULL); | |
T cpu_ref; | |
CHECK(cuMemcpyDtoH(&cpu_ref, (CUdeviceptr) gpu_ref, sizeof(T))); | |
std::cout << name << ": " << cpu_ref << std::endl; | |
CHECK(cuModuleUnload(mod)); | |
} | |
int main() { | |
CHECK(cuInit(0)); | |
CUdevice dev; | |
CHECK(cuDeviceGet(&dev, 0)); | |
CUcontext ctx; | |
CHECK(cuCtxCreate(&ctx, 0, dev)); | |
test<int>("32bit"); | |
test<long long int>("64bit"); | |
CHECK(cuCtxDestroy(ctx)); | |
return 0; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using CUDAdrv | |
dev = CuDevice(0) | |
ctx = CuContext(dev) | |
function test(name, T) | |
ptx = readstring("$name.ptx") | |
cuda_mod = CuModule(ptx) | |
cuda_fun = CuFunction(cuda_mod, "kernel") | |
ref = CuArray{T}(1) | |
cudacall(cuda_fun, 1, 1, Tuple{T, Ptr{T}}, T(1), pointer(ref)) | |
println("$name: ", Array(ref)[1]) | |
if !isfile("$name.sass") | |
cap = capability(dev) | |
gpu = "sm_$(cap.major)$(cap.minor)" | |
run(`ptxas --gpu-name $gpu --output-file $name.cuobj --input-as-string $ptx`) | |
open("$name.sass", "w") do io | |
print(io, readstring(`cuobjdump --dump-sass $name.cuobj`)) | |
end | |
rm("$name.cuobj") | |
end | |
end | |
test("32bit", Int32) | |
test("64bit", Int64) | |
destroy(ctx) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment