Created
May 14, 2020 11:14
-
-
Save aolo2/643189cdb5a8f5b54de22e70ef0dd1a3 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdlib.h> | |
#include <stdio.h> | |
#include <stdbool.h> | |
#include <string.h> | |
#include <stdint.h> | |
#include <assert.h> | |
#include <omp.h> | |
#include <emmintrin.h> | |
#include "external/tracy/TracyC.h" | |
typedef uint64_t u64; | |
typedef uint32_t u32; | |
struct ms_v3 { | |
float x; | |
float y; | |
float z; | |
}; | |
struct ms_mesh { | |
struct ms_v3 *vertices; | |
int *faces; | |
int degree; | |
int nverts; | |
int nfaces; | |
}; | |
typedef union m128i { | |
__m128i v; | |
struct { | |
u32 x; | |
u32 y; | |
u32 z; | |
u32 w; | |
}; | |
} m128i; | |
static u64 | |
cycles_now(void) | |
{ | |
u32 hi, lo; | |
__asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi)); | |
return((u64) lo) | (((u64) hi) << 32); | |
} | |
int | |
main(void) | |
{ | |
TracyCZoneN(load, "load file", true); | |
FILE *dump_file = fopen("dump", "rb"); | |
struct ms_mesh mesh = { 0 }; | |
struct ms_mesh new_mesh = { 0 }; | |
int vert_base = 0; | |
int nedge_pointsv = 0; | |
mesh.degree = 4; | |
fread(&mesh.nfaces, sizeof(int), 1, dump_file); | |
fread(&mesh.nverts, sizeof(int), 1, dump_file); | |
fread(&nedge_pointsv, sizeof(int), 1, dump_file); | |
fread(&vert_base, sizeof(int), 1, dump_file); | |
printf("%d %d %d %d\n", mesh.nfaces, mesh.nverts, nedge_pointsv, vert_base); | |
struct ms_v3 *face_points = malloc(mesh.nfaces * sizeof(struct ms_v3)); | |
int *edge_points = malloc(mesh.nfaces * mesh.degree * sizeof(int)); | |
mesh.faces = malloc(mesh.nfaces * mesh.degree * sizeof(int)); | |
fread(face_points, mesh.nfaces * sizeof(struct ms_v3), 1, dump_file); | |
fread(edge_points, mesh.nfaces * mesh.degree * sizeof(int), 1, dump_file); | |
fread(mesh.faces, mesh.nfaces * mesh.degree * sizeof(int), 1, dump_file); | |
fclose(dump_file); | |
new_mesh.nfaces = mesh.nfaces * 4; | |
new_mesh.nverts = mesh.nverts + nedge_pointsv + mesh.nfaces; | |
new_mesh.vertices = malloc(new_mesh.nverts * sizeof(struct ms_v3)); | |
// new_mesh.faces = malloc(new_mesh.nfaces * 4 * sizeof(int)); | |
__m128i *faces; | |
posix_memalign((void **) &faces, 64, new_mesh.nfaces * 4 * sizeof(__m128i)); | |
//////////////////////////////// | |
int nthreads = 4; | |
omp_set_num_threads(nthreads); | |
//////////////////////////////// | |
TracyCZoneEnd(load); | |
int edgep_base = mesh.nverts; | |
memcpy(new_mesh.vertices + vert_base, face_points, mesh.nfaces * sizeof(struct ms_v3)); | |
u64 before = cycles_now(); | |
int iterations = 500; | |
TracyCZoneN(subdiv_out, "subdiv out", true); | |
for (int i = 0; i < iterations; ++i) { | |
TracyCZoneN(subdiv_in, "subdiv inner", true); | |
#pragma omp parallel for | |
for (int face = 0; face < mesh.nfaces * 4; face += 4) { | |
int facep_index = vert_base + (face >> 2); | |
m128i epts = { .v = _mm_load_si128((__m128i *) (edge_points + face)) }; | |
m128i fcs = { .v = _mm_load_si128((__m128i *) (mesh.faces + face)) }; | |
/* Add faces */ | |
faces[face + 0] = _mm_set_epi32(fcs.x, epts.x, facep_index, epts.w); | |
faces[face + 1] = _mm_set_epi32(fcs.y, epts.y, facep_index, epts.x); | |
faces[face + 2] = _mm_set_epi32(fcs.z, epts.z, facep_index, epts.y); | |
faces[face + 3] = _mm_set_epi32(fcs.w, epts.w, facep_index, epts.z); | |
} | |
TracyCZoneEnd(subdiv_in); | |
} | |
TracyCZoneEnd(subdiv_out); | |
new_mesh.faces = (int *) faces; | |
u64 after = cycles_now(); | |
printf("%ld cycles, %ld c/i\n", after - before, (after - before) / iterations); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment