Skip to content

Instantly share code, notes, and snippets.

@m1el
Last active August 29, 2015 14:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save m1el/2500c717043a475f4d9a to your computer and use it in GitHub Desktop.
Save m1el/2500c717043a475f4d9a to your computer and use it in GitHub Desktop.
// cl hmh_transpose.c /nologo /O2 && hmh_transpose.exe
#include <stdio.h>
typedef unsigned __int64 uint64;
#if defined(_WIN32)
#include <windows.h>
uint64 timer_frequency()
{
uint64 freq;
QueryPerformanceFrequency((LARGE_INTEGER*)&freq);
return freq;
}
uint64 timer_now()
{
uint64 time;
QueryPerformanceCounter((LARGE_INTEGER *)&time);
return time;
}
#else
#error "TODO: port timer functions"
#endif
typedef enum {
OK = 0,
WRONG_INPUT,
SET_ON_FIRE,
} mt_error;
typedef mt_error (*transpose_fn)(int* matrix, int height, int width);
typedef unsigned int uint;
mt_error transpose_naive_rows(int* matrix, uint width, uint height)
{
if (width <= 0 || height <= 0) { return WRONG_INPUT; }
int* scratch = (int*)malloc(sizeof(int) * height * width);
if (!scratch) { return SET_ON_FIRE; }
for (uint i = 0; i < height; i++)
{
for (uint j = 0; j < width; j++)
{
scratch[j*height + i] = matrix[i*width + j];
}
}
memcpy(matrix, scratch, sizeof(int) * width * height);
free(scratch);
return OK;
}
mt_error transpose_naive_cols(int* matrix, uint width, uint height)
{
if (width <= 0 || height <= 0) { return WRONG_INPUT; }
int* scratch = (int*)malloc(sizeof(int) * width * height);
if (!scratch) { return SET_ON_FIRE; }
for (uint j = 0; j < width; j++)
{
for (uint i = 0; i < height; i++)
{
scratch[j*height + i] = matrix[i*width + j];
}
}
memcpy(matrix, scratch, sizeof(int) * width * height);
free(scratch);
return OK;
}
uint next_in_cycle(uint i, uint w, uint h)
{
return (i*w + i/h) % (w*h);
}
mt_error transpose_cycles(int* matrix, uint width, uint height)
{
if (width <= 0 || height <= 0) { return WRONG_INPUT; }
uint size = width * height;
char* visited = (char*)malloc(sizeof(char) * size);
if (!visited) { return SET_ON_FIRE; }
memset(visited, 0, sizeof(char) * size);
for (uint i = 0; i < size; i++)
{
if (visited[i]) { continue; }
uint start = i;
// swap elements while following a cycle
int tmp = matrix[i];
while (1) {
visited[i] = 1;
uint next_idx = next_in_cycle(i, width, height);
if (next_idx == start)
{
matrix[i] = tmp;
i = start;
break;
}
matrix[i] = matrix[next_idx];
i = next_idx;
}
}
free(visited);
return OK;
}
mt_error transpose_cycles_const_space(
int* matrix, int width, int height)
{
if (width <= 0 || height <= 0) { return WRONG_INPUT; }
uint size = width * height;
for (uint i = 0; i < size; i++)
{
uint start = i;
// do a dry-run, if we land in a cell to the left,
// then we've already visited that loop
do {
i = next_in_cycle(i, width, height);
} while (i > start);
if (i < start)
{
i = start;
continue;
}
// swap elements while following a cycle
int tmp = matrix[i];
while (1) {
uint next_idx = next_in_cycle(i, width, height);
if (next_idx == start)
{
matrix[i] = tmp;
i = start;
break;
}
matrix[i] = matrix[next_idx];
i = next_idx;
}
}
return OK;
}
void fill_array(int* matrix, uint size)
{
for (uint i = 0; i < size; i++)
{
matrix[i] = i;
}
}
int test_transposed(int* matrix, uint width, uint height)
{
for (uint i = 0; i < width; i++)
{
for (uint j = 0; j < height; j++)
{
if (matrix[i*height+j] != j*width + i) {
printf("pos: %d %d\n", i, j);
printf("val: %d %d\n", matrix[i*height+j], j*width + i);
return 0;
}
}
}
return 1;
}
int test_transpose_fn(
int* matrix, uint width, uint height,
transpose_fn fn, int loops)
{
fill_array(matrix, width * height);
mt_error success = fn(matrix, width, height);
if (!success == OK)
{
printf("transpose failed!\n");
return -1;
}
int res = test_transposed(matrix, width, height);
if (!res)
{
printf("function did not transpose correctly!\n");
return -1;
}
uint64 start = timer_now();
for (int i = 0; i < loops; i++)
{
fn(matrix, width, height);
}
uint64 end = timer_now();
return (end - start) * 1000 / (timer_frequency());
}
int main(int argc, char** argv)
{
uint width = 2;
uint height = 1000000;
int* matrix = (int*)malloc(sizeof(int) * width * height);
if (!matrix) {
printf("caught fire!\n");
return 1;
}
int loops = 10;
printf("naive cols: %d\n",
test_transpose_fn(matrix, width, height,
(transpose_fn)&transpose_naive_cols, loops));
printf("naive rows: %d\n",
test_transpose_fn(matrix, width, height,
(transpose_fn)&transpose_naive_rows, loops));
printf("cycles: %d\n",
test_transpose_fn(matrix, width, height,
(transpose_fn)&transpose_cycles, loops));
printf("cycles const space: %d\n",
test_transpose_fn(matrix, width, height,
(transpose_fn)&transpose_cycles_const_space, loops));
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment