Last active
February 1, 2017 11:41
-
-
Save suminb/6c24473a10938cdea4f6069162d6d0b6 to your computer and use it in GitHub Desktop.
Run OpenCL code with Python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
C:\users\suminb\Downloads\oclDeviceQuery.exe Starting... | |
OpenCL SW Info: | |
CL_PLATFORM_NAME: NVIDIA CUDA | |
CL_PLATFORM_VERSION: OpenCL 1.1 CUDA 6.5.14 | |
OpenCL SDK Revision: 7027912 | |
OpenCL Device Info: | |
1 devices found supporting OpenCL: | |
--------------------------------- | |
Device GeForce GT 740 | |
--------------------------------- | |
CL_DEVICE_NAME: GeForce GT 740 | |
CL_DEVICE_VENDOR: NVIDIA Corporation | |
CL_DRIVER_VERSION: 340.62 | |
CL_DEVICE_VERSION: OpenCL 1.1 CUDA | |
CL_DEVICE_OPENCL_C_VERSION: OpenCL C 1.1 | |
CL_DEVICE_TYPE: CL_DEVICE_TYPE_GPU | |
CL_DEVICE_MAX_COMPUTE_UNITS: 2 | |
CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS: 3 | |
CL_DEVICE_MAX_WORK_ITEM_SIZES: 1024 / 1024 / 64 | |
CL_DEVICE_MAX_WORK_GROUP_SIZE: 1024 | |
CL_DEVICE_MAX_CLOCK_FREQUENCY: 1071 MHz | |
CL_DEVICE_ADDRESS_BITS: 32 | |
CL_DEVICE_MAX_MEM_ALLOC_SIZE: 256 MByte | |
CL_DEVICE_GLOBAL_MEM_SIZE: 1024 MByte | |
CL_DEVICE_ERROR_CORRECTION_SUPPORT: no | |
CL_DEVICE_LOCAL_MEM_TYPE: local | |
CL_DEVICE_LOCAL_MEM_SIZE: 47 KByte | |
CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE: 64 KByte | |
CL_DEVICE_QUEUE_PROPERTIES: CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | |
CL_DEVICE_QUEUE_PROPERTIES: CL_QUEUE_PROFILING_ENABLE | |
CL_DEVICE_IMAGE_SUPPORT: 1 | |
CL_DEVICE_MAX_READ_IMAGE_ARGS: 256 | |
CL_DEVICE_MAX_WRITE_IMAGE_ARGS: 16 | |
CL_DEVICE_SINGLE_FP_CONFIG: denorms INF-quietNaNs round-to-nearest round-to-zero round-to-inf fma | |
CL_DEVICE_IMAGE <dim> 2D_MAX_WIDTH 32768 | |
2D_MAX_HEIGHT 32768 | |
3D_MAX_WIDTH 4096 | |
3D_MAX_HEIGHT 4096 | |
3D_MAX_DEPTH 4096 | |
CL_DEVICE_EXTENSIONS: cl_khr_byte_addressable_store | |
cl_khr_icd | |
cl_khr_gl_sharing | |
cl_nv_d3d9_sharing | |
cl_nv_d3d10_sharing | |
cl_khr_d3d10_sharing | |
cl_nv_d3d11_sharing | |
cl_nv_compiler_options | |
cl_nv_device_attribute_query | |
cl_nv_pragma_unroll | |
cl_khr_global_int32_base_atomics | |
cl_khr_global_int32_extended_atomics | |
cl_khr_local_int32_base_atomics | |
cl_khr_local_int32_extended_atomics | |
cl_khr_fp64 | |
CL_DEVICE_COMPUTE_CAPABILITY_NV: 3.0 | |
NUMBER OF MULTIPROCESSORS: 2 | |
NUMBER OF CUDA CORES: 384 | |
CL_DEVICE_REGISTERS_PER_BLOCK_NV: 65536 | |
CL_DEVICE_WARP_SIZE_NV: 32 | |
CL_DEVICE_GPU_OVERLAP_NV: CL_TRUE | |
CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV: CL_TRUE | |
CL_DEVICE_INTEGRATED_MEMORY_NV: CL_FALSE | |
CL_DEVICE_PREFERRED_VECTOR_WIDTH_<t> CHAR 1, SHORT 1, INT 1, LONG 1, FLOAT 1, DOUBLE 1 | |
--------------------------------- | |
2D Image Formats Supported (71) | |
--------------------------------- | |
# Channel Order Channel Type | |
1 CL_R CL_FLOAT | |
2 CL_R CL_HALF_FLOAT | |
3 CL_R CL_UNORM_INT8 | |
4 CL_R CL_UNORM_INT16 | |
5 CL_R CL_SNORM_INT16 | |
6 CL_R CL_SIGNED_INT8 | |
7 CL_R CL_SIGNED_INT16 | |
8 CL_R CL_SIGNED_INT32 | |
9 CL_R CL_UNSIGNED_INT8 | |
10 CL_R CL_UNSIGNED_INT16 | |
11 CL_R CL_UNSIGNED_INT32 | |
12 CL_A CL_FLOAT | |
13 CL_A CL_HALF_FLOAT | |
14 CL_A CL_UNORM_INT8 | |
15 CL_A CL_UNORM_INT16 | |
16 CL_A CL_SNORM_INT16 | |
17 CL_A CL_SIGNED_INT8 | |
18 CL_A CL_SIGNED_INT16 | |
19 CL_A CL_SIGNED_INT32 | |
20 CL_A CL_UNSIGNED_INT8 | |
21 CL_A CL_UNSIGNED_INT16 | |
22 CL_A CL_UNSIGNED_INT32 | |
23 CL_RG CL_FLOAT | |
24 CL_RG CL_HALF_FLOAT | |
25 CL_RG CL_UNORM_INT8 | |
26 CL_RG CL_UNORM_INT16 | |
27 CL_RG CL_SNORM_INT16 | |
28 CL_RG CL_SIGNED_INT8 | |
29 CL_RG CL_SIGNED_INT16 | |
30 CL_RG CL_SIGNED_INT32 | |
31 CL_RG CL_UNSIGNED_INT8 | |
32 CL_RG CL_UNSIGNED_INT16 | |
33 CL_RG CL_UNSIGNED_INT32 | |
34 CL_RA CL_FLOAT | |
35 CL_RA CL_HALF_FLOAT | |
36 CL_RA CL_UNORM_INT8 | |
37 CL_RA CL_UNORM_INT16 | |
38 CL_RA CL_SNORM_INT16 | |
39 CL_RA CL_SIGNED_INT8 | |
40 CL_RA CL_SIGNED_INT16 | |
41 CL_RA CL_SIGNED_INT32 | |
42 CL_RA CL_UNSIGNED_INT8 | |
43 CL_RA CL_UNSIGNED_INT16 | |
44 CL_RA CL_UNSIGNED_INT32 | |
45 CL_RGBA CL_FLOAT | |
46 CL_RGBA CL_HALF_FLOAT | |
47 CL_RGBA CL_UNORM_INT8 | |
48 CL_RGBA CL_UNORM_INT16 | |
49 CL_RGBA CL_SNORM_INT16 | |
50 CL_RGBA CL_SIGNED_INT8 | |
51 CL_RGBA CL_SIGNED_INT16 | |
52 CL_RGBA CL_SIGNED_INT32 | |
53 CL_RGBA CL_UNSIGNED_INT8 | |
54 CL_RGBA CL_UNSIGNED_INT16 | |
55 CL_RGBA CL_UNSIGNED_INT32 | |
56 CL_BGRA CL_UNORM_INT8 | |
57 CL_BGRA CL_SIGNED_INT8 | |
58 CL_BGRA CL_UNSIGNED_INT8 | |
59 CL_ARGB CL_UNORM_INT8 | |
60 CL_ARGB CL_SIGNED_INT8 | |
61 CL_ARGB CL_UNSIGNED_INT8 | |
62 CL_INTENSITY CL_FLOAT | |
63 CL_INTENSITY CL_HALF_FLOAT | |
64 CL_INTENSITY CL_UNORM_INT8 | |
65 CL_INTENSITY CL_UNORM_INT16 | |
66 CL_INTENSITY CL_SNORM_INT16 | |
67 CL_LUMINANCE CL_FLOAT | |
68 CL_LUMINANCE CL_HALF_FLOAT | |
69 CL_LUMINANCE CL_UNORM_INT8 | |
70 CL_LUMINANCE CL_UNORM_INT16 | |
71 CL_LUMINANCE CL_SNORM_INT16 | |
--------------------------------- | |
3D Image Formats Supported (71) | |
--------------------------------- | |
# Channel Order Channel Type | |
1 CL_R CL_FLOAT | |
2 CL_R CL_HALF_FLOAT | |
3 CL_R CL_UNORM_INT8 | |
4 CL_R CL_UNORM_INT16 | |
5 CL_R CL_SNORM_INT16 | |
6 CL_R CL_SIGNED_INT8 | |
7 CL_R CL_SIGNED_INT16 | |
8 CL_R CL_SIGNED_INT32 | |
9 CL_R CL_UNSIGNED_INT8 | |
10 CL_R CL_UNSIGNED_INT16 | |
11 CL_R CL_UNSIGNED_INT32 | |
12 CL_A CL_FLOAT | |
13 CL_A CL_HALF_FLOAT | |
14 CL_A CL_UNORM_INT8 | |
15 CL_A CL_UNORM_INT16 | |
16 CL_A CL_SNORM_INT16 | |
17 CL_A CL_SIGNED_INT8 | |
18 CL_A CL_SIGNED_INT16 | |
19 CL_A CL_SIGNED_INT32 | |
20 CL_A CL_UNSIGNED_INT8 | |
21 CL_A CL_UNSIGNED_INT16 | |
22 CL_A CL_UNSIGNED_INT32 | |
23 CL_RG CL_FLOAT | |
24 CL_RG CL_HALF_FLOAT | |
25 CL_RG CL_UNORM_INT8 | |
26 CL_RG CL_UNORM_INT16 | |
27 CL_RG CL_SNORM_INT16 | |
28 CL_RG CL_SIGNED_INT8 | |
29 CL_RG CL_SIGNED_INT16 | |
30 CL_RG CL_SIGNED_INT32 | |
31 CL_RG CL_UNSIGNED_INT8 | |
32 CL_RG CL_UNSIGNED_INT16 | |
33 CL_RG CL_UNSIGNED_INT32 | |
34 CL_RA CL_FLOAT | |
35 CL_RA CL_HALF_FLOAT | |
36 CL_RA CL_UNORM_INT8 | |
37 CL_RA CL_UNORM_INT16 | |
38 CL_RA CL_SNORM_INT16 | |
39 CL_RA CL_SIGNED_INT8 | |
40 CL_RA CL_SIGNED_INT16 | |
41 CL_RA CL_SIGNED_INT32 | |
42 CL_RA CL_UNSIGNED_INT8 | |
43 CL_RA CL_UNSIGNED_INT16 | |
44 CL_RA CL_UNSIGNED_INT32 | |
45 CL_RGBA CL_FLOAT | |
46 CL_RGBA CL_HALF_FLOAT | |
47 CL_RGBA CL_UNORM_INT8 | |
48 CL_RGBA CL_UNORM_INT16 | |
49 CL_RGBA CL_SNORM_INT16 | |
50 CL_RGBA CL_SIGNED_INT8 | |
51 CL_RGBA CL_SIGNED_INT16 | |
52 CL_RGBA CL_SIGNED_INT32 | |
53 CL_RGBA CL_UNSIGNED_INT8 | |
54 CL_RGBA CL_UNSIGNED_INT16 | |
55 CL_RGBA CL_UNSIGNED_INT32 | |
56 CL_BGRA CL_UNORM_INT8 | |
57 CL_BGRA CL_SIGNED_INT8 | |
58 CL_BGRA CL_UNSIGNED_INT8 | |
59 CL_ARGB CL_UNORM_INT8 | |
60 CL_ARGB CL_SIGNED_INT8 | |
61 CL_ARGB CL_UNSIGNED_INT8 | |
62 CL_INTENSITY CL_FLOAT | |
63 CL_INTENSITY CL_HALF_FLOAT | |
64 CL_INTENSITY CL_UNORM_INT8 | |
65 CL_INTENSITY CL_UNORM_INT16 | |
66 CL_INTENSITY CL_SNORM_INT16 | |
67 CL_LUMINANCE CL_FLOAT | |
68 CL_LUMINANCE CL_HALF_FLOAT | |
69 CL_LUMINANCE CL_UNORM_INT8 | |
70 CL_LUMINANCE CL_UNORM_INT16 | |
71 CL_LUMINANCE CL_SNORM_INT16 | |
oclDeviceQuery, Platform Name = NVIDIA CUDA, Platform Version = OpenCL 1.1 CUDA 6.5.14, SDK Revision = 7027912, NumDevs = 1, Device = GeForce GT 740 | |
System Info: | |
Local Time/Date = 13:45:17, 1/6/2015 | |
CPU Arch: 9 | |
CPU Level: 6 | |
# of CPU processors: 4 | |
Windows Build: 7601 | |
Windows Ver: 6.1 (Windows Vista / Windows 7) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
import numpy as np | |
import pyopencl as cl | |
def run(): | |
a_np = np.random.rand(2048, 2048).astype(np.float32) | |
b_np = np.random.rand(2048, 2048).astype(np.float32) | |
# ctx = cl.create_some_context() | |
platform = cl.get_platforms()[0] | |
device = platform.get_devices(device_type=cl.device_type.ALL)[0] | |
#import pdb; pdb.set_trace() | |
ctx = cl.Context([device]) | |
queue = cl.CommandQueue(ctx) | |
mf = cl.mem_flags | |
a_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=a_np) | |
b_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=b_np) | |
prg = cl.Program(ctx, """ | |
__kernel void | |
mul(__global float* C, | |
__global float* A, | |
__global float* B, | |
int wA, int wB) | |
{ | |
// 2D Thread ID | |
int tx = get_local_id(0); | |
int ty = get_local_id(1); | |
// value stores the element | |
// that is computed by the thread | |
float value = 0; | |
for (int k = 0; k < wA; ++k) | |
{ | |
float elementA = A[ty * wA + k]; | |
float elementB = B[k * wB + tx]; | |
value += elementA * elementB; | |
} | |
// Write the matrix to device memory each | |
// thread writes one element | |
C[ty * wA + tx] = value; | |
} | |
""").build() | |
res_g = cl.Buffer(ctx, mf.WRITE_ONLY, a_np.nbytes) | |
for i in range(5*15): | |
prg.mul(queue, a_np.shape, None, a_g, b_g, res_g, np.int32(2048), np.int32(2048)) | |
res_np = np.empty_like(a_np) | |
cl.enqueue_copy(queue, res_np, res_g) | |
# Check on CPU with Numpy: | |
print(res_np - (a_np + b_np)) | |
print(np.linalg.norm(res_np - (a_np + b_np))) | |
if __name__ == '__main__': | |
run() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment