Skip to content

Instantly share code, notes, and snippets.

@mikejs
Created April 19, 2010 02:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mikejs/370705 to your computer and use it in GitHub Desktop.
Save mikejs/370705 to your computer and use it in GitHub Desktop.
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <OpenCL/opencl.h>
const char *KernelSource = "\n" \
"#define DELTA 0x9e3779b9 \n" \
"#define ENC_ROUND(sum)" \
"{ " \
" v.s0 += ((v.s1 << 4) + key.s0) ^ (v.s1 + sum) ^ ((v.s1 >> 5) + key.s1); " \
" v.s1 += ((v.s0 << 4) + key.s2) ^ (v.s0 + sum) ^ ((v.s0 >> 5) + key.s3); " \
"} \n" \
"__kernel void encrypt(__global uint* input, \n" \
" const uint4 key, \n" \
" const uint blocks) \n" \
"{ \n" \
" int i; int id = get_global_id(0); \n" \
" if (id > blocks) return; \n" \
" uint2 v = {input[id * 2], input[(id * 2) + 1]}; \n" \
" // no unroll pragma? \n" \
" ENC_ROUND(DELTA); \n" \
" ENC_ROUND(DELTA * 2); ENC_ROUND(DELTA * 3); \n" \
" ENC_ROUND(DELTA * 4); ENC_ROUND(DELTA * 5); \n" \
" ENC_ROUND(DELTA * 6); ENC_ROUND(DELTA * 7); \n" \
" ENC_ROUND(DELTA * 8); ENC_ROUND(DELTA * 9); \n" \
" ENC_ROUND(DELTA * 10); ENC_ROUND(DELTA * 11); \n" \
" ENC_ROUND(DELTA * 12); ENC_ROUND(DELTA * 13); \n" \
" ENC_ROUND(DELTA * 14); ENC_ROUND(DELTA * 15); \n" \
" ENC_ROUND(DELTA * 16); ENC_ROUND(DELTA * 17); \n" \
" ENC_ROUND(DELTA * 18); ENC_ROUND(DELTA * 19); \n" \
" ENC_ROUND(DELTA * 20); ENC_ROUND(DELTA * 21); \n" \
" ENC_ROUND(DELTA * 22); ENC_ROUND(DELTA * 23); \n" \
" ENC_ROUND(DELTA * 24); ENC_ROUND(DELTA * 25); \n" \
" ENC_ROUND(DELTA * 26); ENC_ROUND(DELTA * 27); \n" \
" ENC_ROUND(DELTA * 28); ENC_ROUND(DELTA * 29); \n" \
" ENC_ROUND(DELTA * 30); ENC_ROUND(DELTA * 31); \n" \
" ENC_ROUND(DELTA * 32); \n" \
" input[id * 2] = v.s0; input[(id * 2) + 1] = v.s1; \n" \
"} \n" \
"#define DEC_SUM 0xC6EF3720 \n" \
"#define DEC_ROUND(sum)" \
"{ " \
" v.s1 -= ((v.s0 << 4) + key.s2) ^ (v.s0 + sum) ^ ((v.s0 >> 5) + key.s3); " \
" v.s0 -= ((v.s1 << 4) + key.s0) ^ (v.s1 + sum) ^ ((v.s1 >> 5) + key.s1); " \
"} \n" \
"__kernel void decrypt(__global uint* input, \n" \
" const uint4 key, \n" \
" const uint blocks) \n" \
"{ \n" \
" int i; \n"
" int id = get_global_id(0); \n" \
" if (id > blocks) return; \n" \
" uint2 v = {input[id * 2], input[(id * 2) + 1]}; \n" \
" DEC_ROUND(DEC_SUM); \n" \
" DEC_ROUND(DEC_SUM - DELTA * 1); DEC_ROUND(DEC_SUM - DELTA * 2); " \
" DEC_ROUND(DEC_SUM - DELTA * 3); DEC_ROUND(DEC_SUM - DELTA * 4); " \
" DEC_ROUND(DEC_SUM - DELTA * 5); DEC_ROUND(DEC_SUM - DELTA * 6); " \
" DEC_ROUND(DEC_SUM - DELTA * 7); DEC_ROUND(DEC_SUM - DELTA * 8); " \
" DEC_ROUND(DEC_SUM - DELTA * 9); DEC_ROUND(DEC_SUM - DELTA * 10); " \
" DEC_ROUND(DEC_SUM - DELTA * 11); DEC_ROUND(DEC_SUM - DELTA * 12); " \
" DEC_ROUND(DEC_SUM - DELTA * 13); DEC_ROUND(DEC_SUM - DELTA * 14); " \
" DEC_ROUND(DEC_SUM - DELTA * 15); DEC_ROUND(DEC_SUM - DELTA * 16); " \
" DEC_ROUND(DEC_SUM - DELTA * 17); DEC_ROUND(DEC_SUM - DELTA * 18); " \
" DEC_ROUND(DEC_SUM - DELTA * 19); DEC_ROUND(DEC_SUM - DELTA * 20); " \
" DEC_ROUND(DEC_SUM - DELTA * 21); DEC_ROUND(DEC_SUM - DELTA * 22); " \
" DEC_ROUND(DEC_SUM - DELTA * 23); DEC_ROUND(DEC_SUM - DELTA * 24); " \
" DEC_ROUND(DEC_SUM - DELTA * 25); DEC_ROUND(DEC_SUM - DELTA * 26); " \
" DEC_ROUND(DEC_SUM - DELTA * 27); DEC_ROUND(DEC_SUM - DELTA * 28); " \
" DEC_ROUND(DEC_SUM - DELTA * 29); DEC_ROUND(DEC_SUM - DELTA * 30); " \
" DEC_ROUND(DEC_SUM - DELTA * 31); " \
" input[id * 2] = v.s0; input[(id * 2) + 1] = v.s1; \n" \
"} \n" \
"\n";
int main() {
cl_device_id device_id;
cl_context context;
cl_command_queue commands;
cl_program program;
cl_kernel enc_kernel, dec_kernel;
cl_int err;
cl_mem data_buf;
const unsigned int BLOCKS = pow(2, 24);
size_t global = BLOCKS;
cl_uint *data = calloc(BLOCKS * 2, sizeof(cl_uint));
cl_uint4 key = {1, 2, 3, 4};
err = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_GPU, 1, &device_id, NULL);
if (err != CL_SUCCESS) {
printf("Error getting device ID.\n");
exit(1);
}
context = clCreateContext(0, 1, &device_id, NULL, NULL, &err);
if (!context) {
printf("Error creating context.\n");
exit(1);
}
commands = clCreateCommandQueue(context, device_id, 0, &err);
if (!commands) {
printf("Error creating command queue.\n");
exit(1);
}
program = clCreateProgramWithSource(context, 1,
(const char **)&KernelSource,
NULL, &err);
if (!program) {
printf("Error creating program.\n");
exit(1);
}
err = clBuildProgram(program, 0, NULL, "-Werror", NULL, NULL);
if (err != CL_SUCCESS) {
size_t len;
char buffer[2048];
printf("Error: Failed to build program executable!\n");
clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG,
sizeof(buffer), buffer, &len);
printf("%s\n", buffer);
exit(1);
}
enc_kernel = clCreateKernel(program, "encrypt", &err);
if (!enc_kernel || err != CL_SUCCESS) {
printf("Error creating kernel.\n");
exit(1);
}
dec_kernel = clCreateKernel(program, "decrypt", &err);
if (!dec_kernel || err != CL_SUCCESS) {
printf("Error creating decrypt kernel.\n");
exit(1);
}
data_buf = clCreateBuffer(context, CL_MEM_READ_WRITE,
sizeof(cl_uint) * BLOCKS * 2, NULL, NULL);
if (!data_buf) {
printf("Error creating buffer.\n");
exit(1);
}
err = clEnqueueWriteBuffer(commands, data_buf, CL_TRUE, 0,
sizeof(cl_uint) * BLOCKS * 2, data,
0, NULL, NULL);
if (err != CL_SUCCESS) {
printf("Error writing data buffer.\n");
exit(1);
}
err = 0;
err = clSetKernelArg(enc_kernel, 0, sizeof(cl_mem), &data_buf);
err |= clSetKernelArg(enc_kernel, 1, sizeof(cl_uint4), &key);
err |= clSetKernelArg(enc_kernel, 2, sizeof(unsigned int), &BLOCKS);
if (err != CL_SUCCESS) {
printf("Error setting args.\n");
exit(1);
}
err = clEnqueueNDRangeKernel(commands, enc_kernel, 1, NULL, &global,
NULL, 0, NULL, NULL);
if (err) {
printf("Error executing encrypt kernel: %d.\n", err);
exit(1);
}
clEnqueueBarrier(commands);
err = 0;
err = clSetKernelArg(dec_kernel, 0, sizeof(cl_mem), &data_buf);
err |= clSetKernelArg(dec_kernel, 1, sizeof(cl_uint4), &key);
err |= clSetKernelArg(dec_kernel, 2, sizeof(unsigned int), &BLOCKS);
if (err != CL_SUCCESS) {
printf("Error setting args.\n");
exit(1);
}
err = clEnqueueNDRangeKernel(commands, dec_kernel, 1, NULL, &global,
NULL, 0, NULL, NULL);
if (err) {
printf("Error executing decrypt kernel: %d\n", err);
exit(1);
}
clFinish(commands);
err = clEnqueueReadBuffer(commands, data_buf, CL_TRUE, 0,
sizeof(cl_uint) * BLOCKS * 2,
data, 0, NULL, NULL);
if (err != CL_SUCCESS) {
printf("Error reading buffer.\n");
exit(1);
}
printf("%u %u\n", data[0], data[1]);
clReleaseMemObject(data_buf);
clReleaseProgram(program);
clReleaseKernel(enc_kernel);
clReleaseKernel(dec_kernel);
clReleaseCommandQueue(commands);
clReleaseContext(context);
free(data);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment