Last active
August 29, 2015 14:23
-
-
Save m1el/2500c717043a475f4d9a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// cl hmh_transpose.c /nologo /O2 && hmh_transpose.exe | |
#include <stdio.h> | |
typedef unsigned __int64 uint64; | |
#if defined(_WIN32) | |
#include <windows.h> | |
uint64 timer_frequency() | |
{ | |
uint64 freq; | |
QueryPerformanceFrequency((LARGE_INTEGER*)&freq); | |
return freq; | |
} | |
uint64 timer_now() | |
{ | |
uint64 time; | |
QueryPerformanceCounter((LARGE_INTEGER *)&time); | |
return time; | |
} | |
#else | |
#error "TODO: port timer functions" | |
#endif | |
typedef enum { | |
OK = 0, | |
WRONG_INPUT, | |
SET_ON_FIRE, | |
} mt_error; | |
typedef mt_error (*transpose_fn)(int* matrix, int height, int width); | |
typedef unsigned int uint; | |
mt_error transpose_naive_rows(int* matrix, uint width, uint height) | |
{ | |
if (width <= 0 || height <= 0) { return WRONG_INPUT; } | |
int* scratch = (int*)malloc(sizeof(int) * height * width); | |
if (!scratch) { return SET_ON_FIRE; } | |
for (uint i = 0; i < height; i++) | |
{ | |
for (uint j = 0; j < width; j++) | |
{ | |
scratch[j*height + i] = matrix[i*width + j]; | |
} | |
} | |
memcpy(matrix, scratch, sizeof(int) * width * height); | |
free(scratch); | |
return OK; | |
} | |
mt_error transpose_naive_cols(int* matrix, uint width, uint height) | |
{ | |
if (width <= 0 || height <= 0) { return WRONG_INPUT; } | |
int* scratch = (int*)malloc(sizeof(int) * width * height); | |
if (!scratch) { return SET_ON_FIRE; } | |
for (uint j = 0; j < width; j++) | |
{ | |
for (uint i = 0; i < height; i++) | |
{ | |
scratch[j*height + i] = matrix[i*width + j]; | |
} | |
} | |
memcpy(matrix, scratch, sizeof(int) * width * height); | |
free(scratch); | |
return OK; | |
} | |
uint next_in_cycle(uint i, uint w, uint h) | |
{ | |
return (i*w + i/h) % (w*h); | |
} | |
mt_error transpose_cycles(int* matrix, uint width, uint height) | |
{ | |
if (width <= 0 || height <= 0) { return WRONG_INPUT; } | |
uint size = width * height; | |
char* visited = (char*)malloc(sizeof(char) * size); | |
if (!visited) { return SET_ON_FIRE; } | |
memset(visited, 0, sizeof(char) * size); | |
for (uint i = 0; i < size; i++) | |
{ | |
if (visited[i]) { continue; } | |
uint start = i; | |
// swap elements while following a cycle | |
int tmp = matrix[i]; | |
while (1) { | |
visited[i] = 1; | |
uint next_idx = next_in_cycle(i, width, height); | |
if (next_idx == start) | |
{ | |
matrix[i] = tmp; | |
i = start; | |
break; | |
} | |
matrix[i] = matrix[next_idx]; | |
i = next_idx; | |
} | |
} | |
free(visited); | |
return OK; | |
} | |
mt_error transpose_cycles_const_space( | |
int* matrix, int width, int height) | |
{ | |
if (width <= 0 || height <= 0) { return WRONG_INPUT; } | |
uint size = width * height; | |
for (uint i = 0; i < size; i++) | |
{ | |
uint start = i; | |
// do a dry-run, if we land in a cell to the left, | |
// then we've already visited that loop | |
do { | |
i = next_in_cycle(i, width, height); | |
} while (i > start); | |
if (i < start) | |
{ | |
i = start; | |
continue; | |
} | |
// swap elements while following a cycle | |
int tmp = matrix[i]; | |
while (1) { | |
uint next_idx = next_in_cycle(i, width, height); | |
if (next_idx == start) | |
{ | |
matrix[i] = tmp; | |
i = start; | |
break; | |
} | |
matrix[i] = matrix[next_idx]; | |
i = next_idx; | |
} | |
} | |
return OK; | |
} | |
void fill_array(int* matrix, uint size) | |
{ | |
for (uint i = 0; i < size; i++) | |
{ | |
matrix[i] = i; | |
} | |
} | |
int test_transposed(int* matrix, uint width, uint height) | |
{ | |
for (uint i = 0; i < width; i++) | |
{ | |
for (uint j = 0; j < height; j++) | |
{ | |
if (matrix[i*height+j] != j*width + i) { | |
printf("pos: %d %d\n", i, j); | |
printf("val: %d %d\n", matrix[i*height+j], j*width + i); | |
return 0; | |
} | |
} | |
} | |
return 1; | |
} | |
int test_transpose_fn( | |
int* matrix, uint width, uint height, | |
transpose_fn fn, int loops) | |
{ | |
fill_array(matrix, width * height); | |
mt_error success = fn(matrix, width, height); | |
if (!success == OK) | |
{ | |
printf("transpose failed!\n"); | |
return -1; | |
} | |
int res = test_transposed(matrix, width, height); | |
if (!res) | |
{ | |
printf("function did not transpose correctly!\n"); | |
return -1; | |
} | |
uint64 start = timer_now(); | |
for (int i = 0; i < loops; i++) | |
{ | |
fn(matrix, width, height); | |
} | |
uint64 end = timer_now(); | |
return (end - start) * 1000 / (timer_frequency()); | |
} | |
int main(int argc, char** argv) | |
{ | |
uint width = 2; | |
uint height = 1000000; | |
int* matrix = (int*)malloc(sizeof(int) * width * height); | |
if (!matrix) { | |
printf("caught fire!\n"); | |
return 1; | |
} | |
int loops = 10; | |
printf("naive cols: %d\n", | |
test_transpose_fn(matrix, width, height, | |
(transpose_fn)&transpose_naive_cols, loops)); | |
printf("naive rows: %d\n", | |
test_transpose_fn(matrix, width, height, | |
(transpose_fn)&transpose_naive_rows, loops)); | |
printf("cycles: %d\n", | |
test_transpose_fn(matrix, width, height, | |
(transpose_fn)&transpose_cycles, loops)); | |
printf("cycles const space: %d\n", | |
test_transpose_fn(matrix, width, height, | |
(transpose_fn)&transpose_cycles_const_space, loops)); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment