public
Last active

C & OpenCL Solution for Daily Programmer [08/13/13] Challenge #137 [Easy] String Transposition http://www.reddit.com/r/dailyprogrammer/comments/1m1jam/081313_challenge_137_easy_string_transposition/. My post in the thread has the OpenCL error handling edited out to make it shorter, this is the full code.

  • Download Gist
sample_run.txt
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41
.\dp_137.exe .\input2.txt
Pre-Transpose
K e r n e l
M i c r o c o n t r o l l e r
R e g i s t e r
M e m o r y
O p e r a t o r
Using device: GeForce GTX 570
Transposed matrix:
K M R M O
e i e e p
r c g m e
n r i o r
e o s r a
l c t y t
o e o
n r r
t
r
o
l
l
e
r
 
As row strings:
KMRMO
eieep
rcgme
nrior
eosra
lctyt
oe o
nr r
t
r
o
l
l
e
r
with_checks.c
C
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <CL/cl.h>
 
/*
* This version performs error checking after just about every OpenCL command
* for brevity a version without all this checking is also provided
*/
 
#define MAX_LINE 256
 
//Performs a transpose on MxN size matrices
const char *matrix_transpose =
"__kernel void matrix_transpose(__global char *in_mat, __global char *out_mat, int row_len, int col_len){\n\
int id = get_global_id(0);\n\
int x = id % row_len;\n\
int y = (id - x) / row_len;\n\
out_mat[y + x * col_len] = in_mat[x + y * row_len];\n\
}";
 
//Read all strings from a file and return them in a single buffer
//also give back how many lines we read in and the length of the longest string
char* lines_from_file(FILE *fp, int *n_lines, int *max_str_len);
//Print out a matrix of characters
void print_matrix(char *matrix, int row_len, int col_len);
//Retrive a row string, caller is responsible for freeing the string
char* get_row_string(char *matrix, int row_len, int row);
 
int main(int argc, char **argv){
if (argc < 2){
printf("Usage: ./prog input.txt\n");
return 1;
}
FILE *fp = fopen(argv[1], "r");
if (!fp){
printf("Failed to open file: %s\n", argv[1]);
return 1;
}
int row_len = 0, col_len = 0;
char *matrix = lines_from_file(fp, &col_len, &row_len);
if (!matrix){
printf("lines_from_file failed\n");
fclose(fp);
return 2;
}
fclose(fp);
 
printf("Pre-Transpose\n");
print_matrix(matrix, row_len, col_len);
 
cl_platform_id platform;
cl_uint num;
int err = clGetPlatformIDs(1, &platform, &num);
if (err < 0){
printf("clGetPlatformIDs error: %d\n", err);
free(matrix);
return err;
}
 
cl_device_id device;
err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL);
if (err < 0){
printf("clGetDeviceIDs error: %d\n", err);
free(matrix);
return err;
}
char name[256];
err = clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(name), name, NULL);
if (err < 0){
printf("clGetDeviceInfo error: %d\n", err);
free(matrix);
return err;
}
name[255] = '\0';
printf("Using device: %s\n", name);
 
cl_context context = clCreateContext(NULL, 1, &device, NULL, NULL, &err);
if (err < 0){
printf("clCreateContext error: %d\n", err);
free(matrix);
return err;
}
 
size_t prog_size = strlen(matrix_transpose);
cl_program program = clCreateProgramWithSource(context, 1, &matrix_transpose, &prog_size, &err);
if (err < 0){
printf("clCreateProgramWithSource error: %d\n", err);
clReleaseContext(context);
free(matrix);
return err;
}
 
err = clBuildProgram(program, 1, &device, NULL, NULL, NULL);
if (err < 0){
printf("clBuildProgram err: %d\n", err);
 
size_t log_size;
clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);
char *prog_log = (char*)calloc(log_size + 1, sizeof(char));
clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, log_size, prog_log, NULL);
printf("%s\n", prog_log);
free(prog_log);
clReleaseProgram(program);
clReleaseContext(context);
free(matrix);
return err;
}
 
cl_kernel kernel = clCreateKernel(program, "matrix_transpose", &err);
if (err < 0){
printf("clCreateKernel err: %d\n", err);
clReleaseProgram(program);
clReleaseContext(context);
free(matrix);
return err;
}
 
cl_command_queue queue = clCreateCommandQueue(context, device, 0, &err);
if (err < 0){
printf("clCreateCommandQueue err: %d\n", err);
clReleaseKernel(kernel);
clReleaseProgram(program);
clReleaseContext(context);
free(matrix);
return err;
}
 
int matrix_buf_size = row_len * col_len * sizeof(char);
cl_mem in_mat = clCreateBuffer(context, CL_MEM_READ_ONLY, matrix_buf_size, NULL, &err);
if (err < 0){
printf("clCreateBuffer err: %d\n", err);
clReleaseCommandQueue(queue);
clReleaseKernel(kernel);
clReleaseProgram(program);
clReleaseContext(context);
free(matrix);
return err;
}
err = clEnqueueWriteBuffer(queue, in_mat, CL_FALSE, 0, matrix_buf_size, matrix, 0, NULL, NULL);
if (err < 0){
printf("clEnqueueWriteBuffer err: %d\n", err);
clReleaseCommandQueue(queue);
clReleaseKernel(kernel);
clReleaseProgram(program);
clReleaseContext(context);
free(matrix);
return err;
}
 
cl_mem out_mat = clCreateBuffer(context, CL_MEM_WRITE_ONLY, matrix_buf_size, NULL, &err);
if (err < 0){
printf("clCreateBuffer err: %d\n", err);
clReleaseMemObject(in_mat);
clReleaseCommandQueue(queue);
clReleaseKernel(kernel);
clReleaseProgram(program);
clReleaseContext(context);
free(matrix);
return err;
}
 
err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &in_mat);
if (err < 0){
printf("clSetKernelArg err: %d\n", err);
clReleaseMemObject(out_mat);
clReleaseMemObject(in_mat);
clReleaseCommandQueue(queue);
clReleaseKernel(kernel);
clReleaseProgram(program);
clReleaseContext(context);
free(matrix);
return err;
}
err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &out_mat);
if (err < 0){
printf("clSetKernelArg err: %d\n", err);
clReleaseMemObject(out_mat);
clReleaseMemObject(in_mat);
clReleaseCommandQueue(queue);
clReleaseKernel(kernel);
clReleaseProgram(program);
clReleaseContext(context);
free(matrix);
return err;
}
 
clSetKernelArg(kernel, 2, sizeof(int), &row_len);
clSetKernelArg(kernel, 3, sizeof(int), &col_len);
 
size_t global_size = row_len * col_len;
err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_size, NULL, 0, NULL, NULL);
if (err < 0){
printf("clEnqueueNDRangeKernel err: %d\n", err);
clReleaseMemObject(out_mat);
clReleaseMemObject(in_mat);
clReleaseCommandQueue(queue);
clReleaseKernel(kernel);
clReleaseProgram(program);
clReleaseContext(context);
free(matrix);
return err;
}
 
char *result = (char*)malloc(matrix_buf_size);
err = clEnqueueReadBuffer(queue, out_mat, CL_TRUE, 0, matrix_buf_size, result, 0, NULL, NULL);
if (err < 0){
printf("clEnqueueReadBuffer err: %d\n", err);
clReleaseMemObject(out_mat);
clReleaseMemObject(in_mat);
clReleaseCommandQueue(queue);
clReleaseKernel(kernel);
clReleaseProgram(program);
clReleaseContext(context);
free(result);
free(matrix);
return err;
}
 
printf("Transposed matrix:\n");
print_matrix(result, col_len, row_len);
 
//Print each row
printf("As row strings:\n");
for (int i = 0; i < row_len - 1; ++i){
char *str = get_row_string(result, col_len, i);
printf("%s\n", str);
free(str);
}
free(result);
free(matrix);
 
clReleaseMemObject(out_mat);
clReleaseMemObject(in_mat);
clReleaseCommandQueue(queue);
clReleaseKernel(kernel);
clReleaseProgram(program);
clReleaseContext(context);
 
return 0;
}
void print_matrix(char *matrix, int row_len, int col_len){
for (int i = 0; i < row_len * col_len; ++i){
if (i != 0 && i % row_len == 0){
printf("\n%c ", matrix[i]);
}
else {
printf("%c ", matrix[i]);
}
}
printf("\n");
}
char* get_row_string(char *matrix, int row_len, int row){
char *str = malloc((row_len + 1) * sizeof(char));
memcpy(str, matrix + row * row_len, row_len);
//Replace all but the final null with spaces
for (int i = 0; i < row_len - 1; ++i){
if (str[i] == '\0'){
str[i] = ' ';
}
}
str[row_len] = '\0';
return str;
}
char* lines_from_file(FILE *fp, int *n_lines, int *max_str_len){
char line[MAX_LINE];
if (!fgets(line, MAX_LINE, fp)){
printf("File read error\n");
return NULL;
}
*n_lines = atoi(line);
 
//Buffer big enough to contain all the lines, and will replace \n with \0
char *content = calloc(*n_lines * MAX_LINE, sizeof(char));
if (!content){
printf("Failed to allocate file content buffer\n");
return NULL;
}
for (int i = 0; i < *n_lines; ++i){
if (!fgets(content + i * MAX_LINE, MAX_LINE, fp)){
printf("Error reading from file\n");
free(content);
return NULL;
}
//Replace newlines with \0 and find max length
int len = strlen(content + i * MAX_LINE);
if (len > *max_str_len){
*max_str_len = len;
}
char *new_line = strchr(content + i * MAX_LINE, '\n');
if (new_line){
*new_line = '\0';
}
}
//Now trim the buffer down to only be n_lines * max_str_len + n_lines (for \0) to
//create a buffer representing the dense matrix that is n_lines x max_str_len
char *matrix = malloc((*n_lines * (*max_str_len) + *n_lines) * sizeof(char));
if (!matrix){
printf("Failed to allocate matrix buffer\n");
free(content);
return NULL;
}
for (int i = 0; i < *n_lines; ++i){
memcpy(matrix + i * (*max_str_len), content + i * MAX_LINE, (*max_str_len) + 1);
}
free(content);
return matrix;
}
without_checks.c
C
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <CL/cl.h>
 
/*
* To show a shorter version with more focus on the method itself, this version has
* no OpenCL error checking
*/
 
#define MAX_LINE 256
 
//Performs a transpose on MxN size matrices
const char *matrix_transpose =
"__kernel void matrix_transpose(__global char *in_mat, __global char *out_mat, int row_len, int col_len){\n\
int id = get_global_id(0);\n\
int x = id % row_len;\n\
int y = (id - x) / row_len;\n\
out_mat[y + x * col_len] = in_mat[x + y * row_len];\n\
}";
 
//Read all strings from a file and return them in a single buffer
//also give back how many lines we read in and the length of the longest string
char* lines_from_file(FILE *fp, int *n_lines, int *max_str_len);
//Print out a matrix of characters
void print_matrix(char *matrix, int row_len, int col_len);
//Retrive a row string, caller is responsible for freeing the string
char* get_row_string(char *matrix, int row_len, int row);
 
int main(int argc, char **argv){
if (argc < 2){
printf("Usage: ./prog input.txt\n");
return 1;
}
FILE *fp = fopen(argv[1], "r");
if (!fp){
printf("Failed to open file: %s\n", argv[1]);
return 1;
}
int row_len = 0, col_len = 0;
char *matrix = lines_from_file(fp, &col_len, &row_len);
if (!matrix){
printf("lines_from_file failed\n");
fclose(fp);
return 2;
}
fclose(fp);
 
printf("Pre-Transpose\n");
print_matrix(matrix, row_len, col_len);
 
cl_platform_id platform;
cl_uint num;
clGetPlatformIDs(1, &platform, &num);
 
cl_device_id device;
clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL);
char name[256];
clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(name), name, NULL);
name[255] = '\0';
printf("Using device: %s\n", name);
 
cl_context context = clCreateContext(NULL, 1, &device, NULL, NULL, NULL);
 
size_t prog_size = strlen(matrix_transpose);
cl_program program = clCreateProgramWithSource(context, 1, &matrix_transpose, &prog_size, NULL);
 
clBuildProgram(program, 1, &device, NULL, NULL, NULL);
 
cl_kernel kernel = clCreateKernel(program, "matrix_transpose", NULL);
 
cl_command_queue queue = clCreateCommandQueue(context, device, 0, NULL);
 
int matrix_buf_size = row_len * col_len * sizeof(char);
cl_mem in_mat = clCreateBuffer(context, CL_MEM_READ_ONLY, matrix_buf_size, NULL, NULL);
clEnqueueWriteBuffer(queue, in_mat, CL_FALSE, 0, matrix_buf_size, matrix, 0, NULL, NULL);
 
cl_mem out_mat = clCreateBuffer(context, CL_MEM_WRITE_ONLY, matrix_buf_size, NULL, NULL);
 
clSetKernelArg(kernel, 0, sizeof(cl_mem), &in_mat);
clSetKernelArg(kernel, 1, sizeof(cl_mem), &out_mat);
clSetKernelArg(kernel, 2, sizeof(int), &row_len);
clSetKernelArg(kernel, 3, sizeof(int), &col_len);
 
size_t global_size = row_len * col_len;
clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_size, NULL, 0, NULL, NULL);
char *result = (char*)malloc(matrix_buf_size);
clEnqueueReadBuffer(queue, out_mat, CL_TRUE, 0, matrix_buf_size, result, 0, NULL, NULL);
 
printf("Transposed matrix:\n");
print_matrix(result, col_len, row_len);
 
//Print each row
printf("As row strings:\n");
for (int i = 0; i < row_len - 1; ++i){
char *str = get_row_string(result, col_len, i);
printf("%s\n", str);
free(str);
}
free(result);
free(matrix);
 
clReleaseMemObject(out_mat);
clReleaseMemObject(in_mat);
clReleaseCommandQueue(queue);
clReleaseKernel(kernel);
clReleaseProgram(program);
clReleaseContext(context);
 
return 0;
}
void print_matrix(char *matrix, int row_len, int col_len){
for (int i = 0; i < row_len * col_len; ++i){
if (i != 0 && i % row_len == 0){
printf("\n%c ", matrix[i]);
}
else {
printf("%c ", matrix[i]);
}
}
printf("\n");
}
char* get_row_string(char *matrix, int row_len, int row){
char *str = malloc((row_len + 1) * sizeof(char));
memcpy(str, matrix + row * row_len, row_len);
//Replace all but the final null with spaces
for (int i = 0; i < row_len - 1; ++i){
if (str[i] == '\0'){
str[i] = ' ';
}
}
str[row_len] = '\0';
return str;
}
char* lines_from_file(FILE *fp, int *n_lines, int *max_str_len){
char line[MAX_LINE];
if (!fgets(line, MAX_LINE, fp)){
printf("File read error\n");
return NULL;
}
*n_lines = atoi(line);
 
//Buffer big enough to contain all the lines, and will replace \n with \0
char *content = calloc(*n_lines * MAX_LINE, sizeof(char));
if (!content){
printf("Failed to allocate file content buffer\n");
return NULL;
}
for (int i = 0; i < *n_lines; ++i){
if (!fgets(content + i * MAX_LINE, MAX_LINE, fp)){
printf("Error reading from file\n");
free(content);
return NULL;
}
//Replace newlines with \0 and find max length
int len = strlen(content + i * MAX_LINE);
if (len > *max_str_len){
*max_str_len = len;
}
char *new_line = strchr(content + i * MAX_LINE, '\n');
if (new_line){
*new_line = '\0';
}
}
//Now trim the buffer down to only be n_lines * max_str_len + n_lines (for \0) to
//create a buffer representing the dense matrix that is n_lines x max_str_len
char *matrix = malloc((*n_lines * (*max_str_len) + *n_lines) * sizeof(char));
if (!matrix){
printf("Failed to allocate matrix buffer\n");
free(content);
return NULL;
}
for (int i = 0; i < *n_lines; ++i){
memcpy(matrix + i * (*max_str_len), content + i * MAX_LINE, (*max_str_len) + 1);
}
free(content);
return matrix;
}

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.