public
Last active

C & OpenCL Solution for Daily Programmer [08/13/13] Challenge #137 [Easy] String Transposition http://www.reddit.com/r/dailyprogrammer/comments/1m1jam/081313_challenge_137_easy_string_transposition/. My post in the thread has the OpenCL error handling edited out to make it shorter, this is the full code.

  • Download Gist
sample_run.txt
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41
.\dp_137.exe .\input2.txt
Pre-Transpose
K e r n e l
M i c r o c o n t r o l l e r
R e g i s t e r
M e m o r y
O p e r a t o r
Using device: GeForce GTX 570
Transposed matrix:
K M R M O
e i e e p
r c g m e
n r i o r
e o s r a
l c t y t
o e o
n r r
t
r
o
l
l
e
r
 
As row strings:
KMRMO
eieep
rcgme
nrior
eosra
lctyt
oe o
nr r
t
r
o
l
l
e
r
with_checks.c
C

#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <CL/cl.h>
 
/*
* This version performs error checking after just about every OpenCL command
* for brevity a version without all this checking is also provided
*/
 
#define MAX_LINE 256
 
//Performs a transpose on MxN size matrices
const char *matrix_transpose =
"__kernel void matrix_transpose(__global char *in_mat, __global char *out_mat, int row_len, int col_len){\n\
int id = get_global_id(0);\n\
int x = id % row_len;\n\
int y = (id - x) / row_len;\n\
out_mat[y + x * col_len] = in_mat[x + y * row_len];\n\
}";
 
//Read all strings from a file and return them in a single buffer
//also give back how many lines we read in and the length of the longest string
char* lines_from_file(FILE *fp, int *n_lines, int *max_str_len);
//Print out a matrix of characters
void print_matrix(char *matrix, int row_len, int col_len);
//Retrive a row string, caller is responsible for freeing the string
char* get_row_string(char *matrix, int row_len, int row);
 
int main(int argc, char **argv){
if (argc < 2){
printf("Usage: ./prog input.txt\n");
return 1;
}
FILE *fp = fopen(argv[1], "r");
if (!fp){
printf("Failed to open file: %s\n", argv[1]);
return 1;
}
int row_len = 0, col_len = 0;
char *matrix = lines_from_file(fp, &col_len, &row_len);
if (!matrix){
printf("lines_from_file failed\n");
fclose(fp);
return 2;
}
fclose(fp);
 
printf("Pre-Transpose\n");
print_matrix(matrix, row_len, col_len);
 
cl_platform_id platform;
cl_uint num;
int err = clGetPlatformIDs(1, &platform, &num);
if (err < 0){
printf("clGetPlatformIDs error: %d\n", err);
free(matrix);
return err;
}
 
cl_device_id device;
err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL);
if (err < 0){
printf("clGetDeviceIDs error: %d\n", err);
free(matrix);
return err;
}
char name[256];
err = clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(name), name, NULL);
if (err < 0){
printf("clGetDeviceInfo error: %d\n", err);
free(matrix);
return err;
}
name[255] = '\0';
printf("Using device: %s\n", name);
 
cl_context context = clCreateContext(NULL, 1, &device, NULL, NULL, &err);
if (err < 0){
printf("clCreateContext error: %d\n", err);
free(matrix);
return err;
}
 
size_t prog_size = strlen(matrix_transpose);
cl_program program = clCreateProgramWithSource(context, 1, &matrix_transpose, &prog_size, &err);
if (err < 0){
printf("clCreateProgramWithSource error: %d\n", err);
clReleaseContext(context);
free(matrix);
return err;
}
 
err = clBuildProgram(program, 1, &device, NULL, NULL, NULL);
if (err < 0){
printf("clBuildProgram err: %d\n", err);
 
size_t log_size;
clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);
char *prog_log = (char*)calloc(log_size + 1, sizeof(char));
clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, log_size, prog_log, NULL);
printf("%s\n", prog_log);
free(prog_log);
clReleaseProgram(program);
clReleaseContext(context);
free(matrix);
return err;
}
 
cl_kernel kernel = clCreateKernel(program, "matrix_transpose", &err);
if (err < 0){
printf("clCreateKernel err: %d\n", err);
clReleaseProgram(program);
clReleaseContext(context);
free(matrix);
return err;
}
 
cl_command_queue queue = clCreateCommandQueue(context, device, 0, &err);
if (err < 0){
printf("clCreateCommandQueue err: %d\n", err);
clReleaseKernel(kernel);
clReleaseProgram(program);
clReleaseContext(context);
free(matrix);
return err;
}
 
int matrix_buf_size = row_len * col_len * sizeof(char);
cl_mem in_mat = clCreateBuffer(context, CL_MEM_READ_ONLY, matrix_buf_size, NULL, &err);
if (err < 0){
printf("clCreateBuffer err: %d\n", err);
clReleaseCommandQueue(queue);
clReleaseKernel(kernel);
clReleaseProgram(program);
clReleaseContext(context);
free(matrix);
return err;
}
err = clEnqueueWriteBuffer(queue, in_mat, CL_FALSE, 0, matrix_buf_size, matrix, 0, NULL, NULL);
if (err < 0){
printf("clEnqueueWriteBuffer err: %d\n", err);
clReleaseCommandQueue(queue);
clReleaseKernel(kernel);
clReleaseProgram(program);
clReleaseContext(context);
free(matrix);
return err;
}
 
cl_mem out_mat = clCreateBuffer(context, CL_MEM_WRITE_ONLY, matrix_buf_size, NULL, &err);
if (err < 0){
printf("clCreateBuffer err: %d\n", err);
clReleaseMemObject(in_mat);
clReleaseCommandQueue(queue);
clReleaseKernel(kernel);
clReleaseProgram(program);
clReleaseContext(context);
free(matrix);
return err;
}
 
err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &in_mat);
if (err < 0){
printf("clSetKernelArg err: %d\n", err);
clReleaseMemObject(out_mat);
clReleaseMemObject(in_mat);
clReleaseCommandQueue(queue);
clReleaseKernel(kernel);
clReleaseProgram(program);
clReleaseContext(context);
free(matrix);
return err;
}
err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &out_mat);
if (err < 0){
printf("clSetKernelArg err: %d\n", err);
clReleaseMemObject(out_mat);
clReleaseMemObject(in_mat);
clReleaseCommandQueue(queue);
clReleaseKernel(kernel);
clReleaseProgram(program);
clReleaseContext(context);
free(matrix);
return err;
}
 
clSetKernelArg(kernel, 2, sizeof(int), &row_len);
clSetKernelArg(kernel, 3, sizeof(int), &col_len);
 
size_t global_size = row_len * col_len;
err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_size, NULL, 0, NULL, NULL);
if (err < 0){
printf("clEnqueueNDRangeKernel err: %d\n", err);
clReleaseMemObject(out_mat);
clReleaseMemObject(in_mat);
clReleaseCommandQueue(queue);
clReleaseKernel(kernel);
clReleaseProgram(program);
clReleaseContext(context);
free(matrix);
return err;
}
 
char *result = (char*)malloc(matrix_buf_size);
err = clEnqueueReadBuffer(queue, out_mat, CL_TRUE, 0, matrix_buf_size, result, 0, NULL, NULL);
if (err < 0){
printf("clEnqueueReadBuffer err: %d\n", err);
clReleaseMemObject(out_mat);
clReleaseMemObject(in_mat);
clReleaseCommandQueue(queue);
clReleaseKernel(kernel);
clReleaseProgram(program);
clReleaseContext(context);
free(result);
free(matrix);
return err;
}
 
printf("Transposed matrix:\n");
print_matrix(result, col_len, row_len);
 
//Print each row
printf("As row strings:\n");
for (int i = 0; i < row_len - 1; ++i){
char *str = get_row_string(result, col_len, i);
printf("%s\n", str);
free(str);
}
free(result);
free(matrix);
 
clReleaseMemObject(out_mat);
clReleaseMemObject(in_mat);
clReleaseCommandQueue(queue);
clReleaseKernel(kernel);
clReleaseProgram(program);
clReleaseContext(context);
 
return 0;
}
void print_matrix(char *matrix, int row_len, int col_len){
for (int i = 0; i < row_len * col_len; ++i){
if (i != 0 && i % row_len == 0){
printf("\n%c ", matrix[i]);
}
else {
printf("%c ", matrix[i]);
}
}
printf("\n");
}
char* get_row_string(char *matrix, int row_len, int row){
char *str = malloc((row_len + 1) * sizeof(char));
memcpy(str, matrix + row * row_len, row_len);
//Replace all but the final null with spaces
for (int i = 0; i < row_len - 1; ++i){
if (str[i] == '\0'){
str[i] = ' ';
}
}
str[row_len] = '\0';
return str;
}
char* lines_from_file(FILE *fp, int *n_lines, int *max_str_len){
char line[MAX_LINE];
if (!fgets(line, MAX_LINE, fp)){
printf("File read error\n");
return NULL;
}
*n_lines = atoi(line);
 
//Buffer big enough to contain all the lines, and will replace \n with \0
char *content = calloc(*n_lines * MAX_LINE, sizeof(char));
if (!content){
printf("Failed to allocate file content buffer\n");
return NULL;
}
for (int i = 0; i < *n_lines; ++i){
if (!fgets(content + i * MAX_LINE, MAX_LINE, fp)){
printf("Error reading from file\n");
free(content);
return NULL;
}
//Replace newlines with \0 and find max length
int len = strlen(content + i * MAX_LINE);
if (len > *max_str_len){
*max_str_len = len;
}
char *new_line = strchr(content + i * MAX_LINE, '\n');
if (new_line){
*new_line = '\0';
}
}
//Now trim the buffer down to only be n_lines * max_str_len + n_lines (for \0) to
//create a buffer representing the dense matrix that is n_lines x max_str_len
char *matrix = malloc((*n_lines * (*max_str_len) + *n_lines) * sizeof(char));
if (!matrix){
printf("Failed to allocate matrix buffer\n");
free(content);
return NULL;
}
for (int i = 0; i < *n_lines; ++i){
memcpy(matrix + i * (*max_str_len), content + i * MAX_LINE, (*max_str_len) + 1);
}
free(content);
return matrix;
}
without_checks.c
C

#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <CL/cl.h>
 
/*
* To show a shorter version with more focus on the method itself, this version has
* no OpenCL error checking
*/
 
#define MAX_LINE 256
 
//Performs a transpose on MxN size matrices
const char *matrix_transpose =
"__kernel void matrix_transpose(__global char *in_mat, __global char *out_mat, int row_len, int col_len){\n\
int id = get_global_id(0);\n\
int x = id % row_len;\n\
int y = (id - x) / row_len;\n\
out_mat[y + x * col_len] = in_mat[x + y * row_len];\n\
}";
 
//Read all strings from a file and return them in a single buffer
//also give back how many lines we read in and the length of the longest string
char* lines_from_file(FILE *fp, int *n_lines, int *max_str_len);
//Print out a matrix of characters
void print_matrix(char *matrix, int row_len, int col_len);
//Retrive a row string, caller is responsible for freeing the string
char* get_row_string(char *matrix, int row_len, int row);
 
int main(int argc, char **argv){
if (argc < 2){
printf("Usage: ./prog input.txt\n");
return 1;
}
FILE *fp = fopen(argv[1], "r");
if (!fp){
printf("Failed to open file: %s\n", argv[1]);
return 1;
}
int row_len = 0, col_len = 0;
char *matrix = lines_from_file(fp, &col_len, &row_len);
if (!matrix){
printf("lines_from_file failed\n");
fclose(fp);
return 2;
}
fclose(fp);
 
printf("Pre-Transpose\n");
print_matrix(matrix, row_len, col_len);
 
cl_platform_id platform;
cl_uint num;
clGetPlatformIDs(1, &platform, &num);
 
cl_device_id device;
clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL);
char name[256];
clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(name), name, NULL);
name[255] = '\0';
printf("Using device: %s\n", name);
 
cl_context context = clCreateContext(NULL, 1, &device, NULL, NULL, NULL);
 
size_t prog_size = strlen(matrix_transpose);
cl_program program = clCreateProgramWithSource(context, 1, &matrix_transpose, &prog_size, NULL);
 
clBuildProgram(program, 1, &device, NULL, NULL, NULL);
 
cl_kernel kernel = clCreateKernel(program, "matrix_transpose", NULL);
 
cl_command_queue queue = clCreateCommandQueue(context, device, 0, NULL);
 
int matrix_buf_size = row_len * col_len * sizeof(char);
cl_mem in_mat = clCreateBuffer(context, CL_MEM_READ_ONLY, matrix_buf_size, NULL, NULL);
clEnqueueWriteBuffer(queue, in_mat, CL_FALSE, 0, matrix_buf_size, matrix, 0, NULL, NULL);
 
cl_mem out_mat = clCreateBuffer(context, CL_MEM_WRITE_ONLY, matrix_buf_size, NULL, NULL);
 
clSetKernelArg(kernel, 0, sizeof(cl_mem), &in_mat);
clSetKernelArg(kernel, 1, sizeof(cl_mem), &out_mat);
clSetKernelArg(kernel, 2, sizeof(int), &row_len);
clSetKernelArg(kernel, 3, sizeof(int), &col_len);
 
size_t global_size = row_len * col_len;
clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_size, NULL, 0, NULL, NULL);
char *result = (char*)malloc(matrix_buf_size);
clEnqueueReadBuffer(queue, out_mat, CL_TRUE, 0, matrix_buf_size, result, 0, NULL, NULL);
 
printf("Transposed matrix:\n");
print_matrix(result, col_len, row_len);
 
//Print each row
printf("As row strings:\n");
for (int i = 0; i < row_len - 1; ++i){
char *str = get_row_string(result, col_len, i);
printf("%s\n", str);
free(str);
}
free(result);
free(matrix);
 
clReleaseMemObject(out_mat);
clReleaseMemObject(in_mat);
clReleaseCommandQueue(queue);
clReleaseKernel(kernel);
clReleaseProgram(program);
clReleaseContext(context);
 
return 0;
}
void print_matrix(char *matrix, int row_len, int col_len){
for (int i = 0; i < row_len * col_len; ++i){
if (i != 0 && i % row_len == 0){
printf("\n%c ", matrix[i]);
}
else {
printf("%c ", matrix[i]);
}
}
printf("\n");
}
char* get_row_string(char *matrix, int row_len, int row){
char *str = malloc((row_len + 1) * sizeof(char));
memcpy(str, matrix + row * row_len, row_len);
//Replace all but the final null with spaces
for (int i = 0; i < row_len - 1; ++i){
if (str[i] == '\0'){
str[i] = ' ';
}
}
str[row_len] = '\0';
return str;
}
char* lines_from_file(FILE *fp, int *n_lines, int *max_str_len){
char line[MAX_LINE];
if (!fgets(line, MAX_LINE, fp)){
printf("File read error\n");
return NULL;
}
*n_lines = atoi(line);
 
//Buffer big enough to contain all the lines, and will replace \n with \0
char *content = calloc(*n_lines * MAX_LINE, sizeof(char));
if (!content){
printf("Failed to allocate file content buffer\n");
return NULL;
}
for (int i = 0; i < *n_lines; ++i){
if (!fgets(content + i * MAX_LINE, MAX_LINE, fp)){
printf("Error reading from file\n");
free(content);
return NULL;
}
//Replace newlines with \0 and find max length
int len = strlen(content + i * MAX_LINE);
if (len > *max_str_len){
*max_str_len = len;
}
char *new_line = strchr(content + i * MAX_LINE, '\n');
if (new_line){
*new_line = '\0';
}
}
//Now trim the buffer down to only be n_lines * max_str_len + n_lines (for \0) to
//create a buffer representing the dense matrix that is n_lines x max_str_len
char *matrix = malloc((*n_lines * (*max_str_len) + *n_lines) * sizeof(char));
if (!matrix){
printf("Failed to allocate matrix buffer\n");
free(content);
return NULL;
}
for (int i = 0; i < *n_lines; ++i){
memcpy(matrix + i * (*max_str_len), content + i * MAX_LINE, (*max_str_len) + 1);
}
free(content);
return matrix;
}

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.