Created
August 26, 2015 02:27
-
-
Save inferrna/922a38d34c06561dd77b to your computer and use it in GitHub Desktop.
leak test
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// File: hello.c | |
// | |
// Abstract: A simple "Hello World" compute example showing basic usage of OpenCL which | |
// calculates the mathematical square (X[i] = pow(X[i],2)) for a buffer of | |
// floating point values. | |
// | |
// | |
// Version: <1.0> | |
// | |
// Disclaimer: IMPORTANT: This Apple software is supplied to you by Apple Inc. ("Apple") | |
// in consideration of your agreement to the following terms, and your use, | |
// installation, modification or redistribution of this Apple software | |
// constitutes acceptance of these terms. If you do not agree with these | |
// terms, please do not use, install, modify or redistribute this Apple | |
// software. | |
// | |
// In consideration of your agreement to abide by the following terms, and | |
// subject to these terms, Apple grants you a personal, non - exclusive | |
// license, under Apple's copyrights in this original Apple software ( the | |
// "Apple Software" ), to use, reproduce, modify and redistribute the Apple | |
// Software, with or without modifications, in source and / or binary forms; | |
// provided that if you redistribute the Apple Software in its entirety and | |
// without modifications, you must retain this notice and the following text | |
// and disclaimers in all such redistributions of the Apple Software. Neither | |
// the name, trademarks, service marks or logos of Apple Inc. may be used to | |
// endorse or promote products derived from the Apple Software without specific | |
// prior written permission from Apple. Except as expressly stated in this | |
// notice, no other rights or licenses, express or implied, are granted by | |
// Apple herein, including but not limited to any patent rights that may be | |
// infringed by your derivative works or by other works in which the Apple | |
// Software may be incorporated. | |
// | |
// The Apple Software is provided by Apple on an "AS IS" basis. APPLE MAKES NO | |
// WARRANTIES, EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION THE IMPLIED | |
// WARRANTIES OF NON - INFRINGEMENT, MERCHANTABILITY AND FITNESS FOR A | |
// PARTICULAR PURPOSE, REGARDING THE APPLE SOFTWARE OR ITS USE AND OPERATION | |
// ALONE OR IN COMBINATION WITH YOUR PRODUCTS. | |
// | |
// IN NO EVENT SHALL APPLE BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL OR | |
// CONSEQUENTIAL DAMAGES ( INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |
// INTERRUPTION ) ARISING IN ANY WAY OUT OF THE USE, REPRODUCTION, MODIFICATION | |
// AND / OR DISTRIBUTION OF THE APPLE SOFTWARE, HOWEVER CAUSED AND WHETHER | |
// UNDER THEORY OF CONTRACT, TORT ( INCLUDING NEGLIGENCE ), STRICT LIABILITY OR | |
// OTHERWISE, EVEN IF APPLE HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
// | |
// Copyright ( C ) 2008 Apple Inc. All Rights Reserved. | |
// | |
//////////////////////////////////////////////////////////////////////////////// | |
#include <fcntl.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <math.h> | |
#include <unistd.h> | |
#include <sys/types.h> | |
#include <sys/stat.h> | |
#include <CL/opencl.h> | |
//////////////////////////////////////////////////////////////////////////////// | |
// Use a static data size for simplicity | |
// | |
#define DATA_SIZE (1024*128) | |
//////////////////////////////////////////////////////////////////////////////// | |
// Simple compute kernel which computes the square of an input array | |
// | |
const char *KernelSource = "\n" \ | |
"__kernel void square( \n" \ | |
" __global float* input, \n" \ | |
" __global float* output, \n" \ | |
" const unsigned int count) \n" \ | |
"{ \n" \ | |
" int i = get_global_id(0); \n" \ | |
" if(i < count) \n" \ | |
" output[i] = input[i] * input[i]; \n" \ | |
"} \n" \ | |
"\n"; | |
//////////////////////////////////////////////////////////////////////////////// | |
int main() | |
{ | |
int err; // error code returned from api calls | |
float data[DATA_SIZE]; // original data set given to device | |
float results[DATA_SIZE]; // results returned from device | |
unsigned int correct; // number of correct results returned | |
size_t global; // global domain size for our calculation | |
size_t local; // local domain size for our calculation | |
int k; | |
int i; | |
cl_device_id device_id; // compute device id | |
cl_context context; // compute context | |
cl_command_queue commands; // compute command queue | |
cl_program program; // compute program | |
cl_kernel kernel; // compute kernel | |
cl_mem input; // device memory used for the input array | |
cl_mem output; // device memory used for the output array | |
cl_platform_id platforms[32]; //an array to hold the IDs of all the platforms, hopefuly there won't be more than 32 | |
cl_uint num_platforms; //this number will hold the number of platforms on this machine | |
char vendor[1024]; //this strirng will hold a platforms vendor | |
cl_device_id devices[32]; //this variable holds the number of devices for each platform, hopefully it won't be more than 32 per platform | |
cl_uint num_devices; //this number will hold the number of devices on this machine | |
char deviceName[1024]; //this string will hold the devices name | |
cl_uint numberOfCores; //this variable holds the number of cores of on a device | |
cl_long amountOfMemory; //this variable holds the amount of memory on a device | |
cl_uint clockFreq; //this variable holds the clock frequency of a device | |
cl_ulong maxAlocatableMem; //this variable holds the maximum allocatable memory | |
cl_ulong localMem; //this variable holds local memory for a device | |
cl_bool available; //this variable holds if the device is available | |
// Fill our data set with random float values | |
// | |
int count = DATA_SIZE; | |
for(i = 0; i < DATA_SIZE; i++) | |
data[i] = rand() / (float)RAND_MAX; | |
// Connect to a compute device | |
// | |
clGetPlatformIDs(2, platforms, &num_platforms); | |
int pid = 0; | |
err = clGetDeviceIDs(platforms[pid], CL_DEVICE_TYPE_ALL, 1, &device_id, NULL); | |
if (err != CL_SUCCESS) | |
{ | |
printf("Error: Failed to create a device group!\n"); | |
return EXIT_FAILURE; | |
} | |
//scan in device information | |
clGetDeviceInfo(device_id, CL_DEVICE_NAME, sizeof(deviceName), deviceName, NULL); | |
clGetDeviceInfo(device_id, CL_DEVICE_VENDOR, sizeof(vendor), vendor, NULL); | |
clGetDeviceInfo(device_id, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(numberOfCores), &numberOfCores, NULL); | |
clGetDeviceInfo(device_id, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(amountOfMemory), &amountOfMemory, NULL); | |
clGetDeviceInfo(device_id, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(clockFreq), &clockFreq, NULL); | |
clGetDeviceInfo(device_id, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(maxAlocatableMem), &maxAlocatableMem, NULL); | |
clGetDeviceInfo(device_id, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(localMem), &localMem, NULL); | |
clGetDeviceInfo(device_id, CL_DEVICE_AVAILABLE, sizeof(available), &available, NULL); | |
//print out device information | |
printf("\tDevice: %u\n", device_id); | |
printf("\t\tName:\t\t\t\t%s\n", deviceName); | |
printf("\t\tVendor:\t\t\t\t%s\n", vendor); | |
printf("\t\tAvailable:\t\t\t%s\n", available ? "Yes" : "No"); | |
printf("\t\tCompute Units:\t\t\t%u\n", numberOfCores); | |
printf("\t\tClock Frequency:\t\t%u mHz\n", clockFreq); | |
printf("\t\tGlobal Memory:\t\t\t%0.00f mb\n", (double)amountOfMemory/1048576); | |
printf("\t\tMax Allocateable Memory:\t%0.00f mb\n", (double)maxAlocatableMem/1048576); | |
printf("\t\tLocal Memory:\t\t\t%u kb\n\n", (unsigned int)localMem); | |
// Create a compute context | |
// | |
context = clCreateContext(0, 1, &device_id, NULL, NULL, &err); | |
if (!context) | |
{ | |
printf("Error: Failed to create a compute context!\n"); | |
return EXIT_FAILURE; | |
} | |
// Create a command commands | |
// | |
commands = clCreateCommandQueue(context, device_id, 0, &err); | |
if (!commands) | |
{ | |
printf("Error: Failed to create a command commands!\n"); | |
return EXIT_FAILURE; | |
} | |
// Create the compute program from the source buffer | |
// | |
program = clCreateProgramWithSource(context, 1, (const char **) & KernelSource, NULL, &err); | |
if (!program) | |
{ | |
printf("Error: Failed to create compute program!\n"); | |
return EXIT_FAILURE; | |
} | |
// Build the program executable | |
// | |
err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL); | |
if (err != CL_SUCCESS) | |
{ | |
size_t len; | |
char buffer[2048]; | |
printf("Error: Failed to build program executable!\n"); | |
clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len); | |
printf("%s\n", buffer); | |
exit(1); | |
} | |
// Create the compute kernel in the program we wish to run | |
// | |
kernel = clCreateKernel(program, "square", &err); | |
if (!kernel || err != CL_SUCCESS) | |
{ | |
printf("Error: Failed to create compute kernel!\n"); | |
exit(1); | |
} | |
for(k = 0; k<9999; k++){ | |
// Create the input and output arrays in device memory for our calculation | |
// | |
input = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float) * DATA_SIZE, NULL, NULL); | |
output = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * DATA_SIZE, NULL, NULL); | |
if (!input || !output) | |
{ | |
printf("Error: Failed to allocate device memory!\n"); | |
exit(1); | |
} | |
// Write our data set into the input array in device memory | |
// | |
err = clEnqueueWriteBuffer(commands, input, CL_TRUE, 0, sizeof(float) * DATA_SIZE, data, 0, NULL, NULL); | |
if (err != CL_SUCCESS) | |
{ | |
printf("Error: Failed to write to source array!\n"); | |
exit(1); | |
} | |
// Set the arguments to our compute kernel | |
// | |
err = 0; | |
err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &input); | |
err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &output); | |
err |= clSetKernelArg(kernel, 2, sizeof(unsigned int), &count); | |
if (err != CL_SUCCESS) | |
{ | |
printf("Error: Failed to set kernel arguments! %d\n", err); | |
exit(1); | |
} | |
// Get the maximum work group size for executing the kernel on the device | |
// | |
err = clGetKernelWorkGroupInfo(kernel, device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, NULL); | |
if (err != CL_SUCCESS) | |
{ | |
printf("Error: Failed to retrieve kernel work group info! %d\n", err); | |
exit(1); | |
} | |
// Execute the kernel over the entire range of our 1d input data set | |
// using the maximum number of work group items for this device | |
// | |
global = count; | |
err = clEnqueueNDRangeKernel(commands, kernel, 1, NULL, &global, &local, 0, NULL, NULL); | |
if (err) | |
{ | |
printf("Error: Failed to execute kernel!\n"); | |
return EXIT_FAILURE; | |
} | |
// Wait for the command commands to get serviced before reading back results | |
// | |
clFinish(commands); | |
// Read back the results from the device to verify the output | |
// | |
err = clEnqueueReadBuffer( commands, output, CL_TRUE, 0, sizeof(float) * DATA_SIZE, results, 0, NULL, NULL ); | |
if (err != CL_SUCCESS) | |
{ | |
printf("Error: Failed to read output array! %d\n", err); | |
exit(1); | |
} | |
// Validate our results | |
// | |
correct = 0; | |
for(i = 0; i < DATA_SIZE; i++) | |
{ | |
if(results[i] == data[i] * data[i]) | |
correct++; | |
} | |
// Print a brief summary detailing the results | |
// | |
//printf("Computed '%d/%d' correct values!\n", correct, count); | |
// Shutdown and cleanup | |
// | |
clReleaseMemObject(input); | |
clReleaseMemObject(output); | |
} | |
clReleaseProgram(program); | |
clReleaseKernel(kernel); | |
clReleaseCommandQueue(commands); | |
clReleaseContext(context); | |
printf("Normal exit atfer %u iterations over %u bytes array\n", k, count); | |
return 0; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pyopencl as cl | |
from pyopencl import array | |
from pyopencl import clrandom | |
import numpy as np | |
from time import sleep | |
ctx = cl.create_some_context() | |
queue = cl.CommandQueue(ctx) | |
mf = cl.mem_flags | |
dtype = np.float32 | |
DATA_SIZE = 128*1024 | |
KernelSource = """ | |
__kernel void square( | |
__global float* input, | |
__global float* output, | |
const unsigned int count) | |
{ | |
int i = get_global_id(0); | |
if(i < count) | |
output[i] = input[i] * input[i]; | |
} | |
""" | |
prg = cl.Program(ctx, KernelSource).build() | |
hostdata = np.random.randn(DATA_SIZE).astype(dtype) | |
hostres = np.empty(shape=hostdata.shape, dtype=hostdata.dtype) | |
for i in range(9999): | |
#clresult = array.Array(queue, (DATA_SIZE,), dtype) | |
clresult = cl.Buffer(ctx, mf.READ_WRITE, hostdata.dtype.itemsize*DATA_SIZE) | |
cldata = cl.Buffer(ctx, mf.READ_ONLY| mf.COPY_HOST_PTR, hostbuf=hostdata) | |
prg.square(queue, (DATA_SIZE,), None, cldata, clresult, np.uint32(DATA_SIZE)) | |
cl.enqueue_copy(queue, hostres, clresult) | |
#assert (clresult.get() == hostdata*hostdata).all(), "Got bad result on {0} iteration".format(i) | |
assert (hostres == hostdata*hostdata).all(), "Got bad result on {0} iteration".format(i) | |
if i%99 == 0: | |
print("Iteration {0}. Still alive".format(i)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
# By JanKanis | |
# http://unix.stackexchange.com/questions/134414/how-to-limit-the-total-resources-memory-of-a-process-and-its-children | |
set -eu | |
if [ "$#" -lt 2 ] | |
then | |
echo Usage: `basename $0` "<limit> <command>..." | |
exit 1 | |
fi | |
limit="$1" | |
shift | |
cgname="limitmem_$$" | |
echo "limiting memory to $limit (cgroup $cgname) for command $@" | |
cgm create memory "$cgname" >/dev/null | |
cgm setvalue memory "$cgname" memory.limit_in_bytes "$limit" >/dev/null | |
# try also limiting swap usage, but this fails if the system has no swap | |
cgm setvalue memory "$cgname" memsw.limit_in_bytes "$limit" >/dev/null 2>&1 || true | |
# spawn subshell to run in the cgroup | |
set +e | |
( | |
set -e | |
cgm movepid memory "$cgname" `sh -c 'echo $PPID'` > /dev/null | |
exec "$@" | |
) | |
# grab exit code | |
exitcode=`echo $?` | |
set -e | |
echo -n "peak memory used: " | |
cgm getvalue memory "$cgname" memory.max_usage_in_bytes | tail -1 | cut -f2 -d\" | |
cgm remove memory "$cgname" >/dev/null | |
exit $exitcode |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment