Skip to content

Instantly share code, notes, and snippets.

@aolo2
Created May 14, 2020 11:14
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save aolo2/643189cdb5a8f5b54de22e70ef0dd1a3 to your computer and use it in GitHub Desktop.
Save aolo2/643189cdb5a8f5b54de22e70ef0dd1a3 to your computer and use it in GitHub Desktop.
#include <stdlib.h>
#include <stdio.h>
#include <stdbool.h>
#include <string.h>
#include <stdint.h>
#include <assert.h>
#include <omp.h>
#include <emmintrin.h>
#include "external/tracy/TracyC.h"
typedef uint64_t u64;
typedef uint32_t u32;
struct ms_v3 {
float x;
float y;
float z;
};
struct ms_mesh {
struct ms_v3 *vertices;
int *faces;
int degree;
int nverts;
int nfaces;
};
typedef union m128i {
__m128i v;
struct {
u32 x;
u32 y;
u32 z;
u32 w;
};
} m128i;
static u64
cycles_now(void)
{
u32 hi, lo;
__asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi));
return((u64) lo) | (((u64) hi) << 32);
}
int
main(void)
{
TracyCZoneN(load, "load file", true);
FILE *dump_file = fopen("dump", "rb");
struct ms_mesh mesh = { 0 };
struct ms_mesh new_mesh = { 0 };
int vert_base = 0;
int nedge_pointsv = 0;
mesh.degree = 4;
fread(&mesh.nfaces, sizeof(int), 1, dump_file);
fread(&mesh.nverts, sizeof(int), 1, dump_file);
fread(&nedge_pointsv, sizeof(int), 1, dump_file);
fread(&vert_base, sizeof(int), 1, dump_file);
printf("%d %d %d %d\n", mesh.nfaces, mesh.nverts, nedge_pointsv, vert_base);
struct ms_v3 *face_points = malloc(mesh.nfaces * sizeof(struct ms_v3));
int *edge_points = malloc(mesh.nfaces * mesh.degree * sizeof(int));
mesh.faces = malloc(mesh.nfaces * mesh.degree * sizeof(int));
fread(face_points, mesh.nfaces * sizeof(struct ms_v3), 1, dump_file);
fread(edge_points, mesh.nfaces * mesh.degree * sizeof(int), 1, dump_file);
fread(mesh.faces, mesh.nfaces * mesh.degree * sizeof(int), 1, dump_file);
fclose(dump_file);
new_mesh.nfaces = mesh.nfaces * 4;
new_mesh.nverts = mesh.nverts + nedge_pointsv + mesh.nfaces;
new_mesh.vertices = malloc(new_mesh.nverts * sizeof(struct ms_v3));
// new_mesh.faces = malloc(new_mesh.nfaces * 4 * sizeof(int));
__m128i *faces;
posix_memalign((void **) &faces, 64, new_mesh.nfaces * 4 * sizeof(__m128i));
////////////////////////////////
int nthreads = 4;
omp_set_num_threads(nthreads);
////////////////////////////////
TracyCZoneEnd(load);
int edgep_base = mesh.nverts;
memcpy(new_mesh.vertices + vert_base, face_points, mesh.nfaces * sizeof(struct ms_v3));
u64 before = cycles_now();
int iterations = 500;
TracyCZoneN(subdiv_out, "subdiv out", true);
for (int i = 0; i < iterations; ++i) {
TracyCZoneN(subdiv_in, "subdiv inner", true);
#pragma omp parallel for
for (int face = 0; face < mesh.nfaces * 4; face += 4) {
int facep_index = vert_base + (face >> 2);
m128i epts = { .v = _mm_load_si128((__m128i *) (edge_points + face)) };
m128i fcs = { .v = _mm_load_si128((__m128i *) (mesh.faces + face)) };
/* Add faces */
faces[face + 0] = _mm_set_epi32(fcs.x, epts.x, facep_index, epts.w);
faces[face + 1] = _mm_set_epi32(fcs.y, epts.y, facep_index, epts.x);
faces[face + 2] = _mm_set_epi32(fcs.z, epts.z, facep_index, epts.y);
faces[face + 3] = _mm_set_epi32(fcs.w, epts.w, facep_index, epts.z);
}
TracyCZoneEnd(subdiv_in);
}
TracyCZoneEnd(subdiv_out);
new_mesh.faces = (int *) faces;
u64 after = cycles_now();
printf("%ld cycles, %ld c/i\n", after - before, (after - before) / iterations);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment