Skip to content

Instantly share code, notes, and snippets.

@zeux
Created August 1, 2017 05:23
Show Gist options
  • Star 5 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save zeux/868ab739bca54207a0bdf24e713035c8 to your computer and use it in GitHub Desktop.
Save zeux/868ab739bca54207a0bdf24e713035c8 to your computer and use it in GitHub Desktop.
Source code for "Optimal grid rendering is not optimal" article.
#include <d3d11.h>
#include <d3dcompiler.h>
#include <stdio.h>
#include <assert.h>
#include <cassert>
#include <cmath>
#include <vector>
#pragma comment (lib, "d3d11.lib")
#pragma comment (lib, "d3dcompiler.lib")
#pragma comment (lib, "dxgi.lib")
namespace TomF
{
const size_t max_cache_size = 32;
const float valence_boost_scale = 2.0f;
const float last_triangle_score = 0.75f;
// The table is for max_cache_size = 32, cache_decay_power = 1.5,
// Equation is ((position - 3) / (max_cache_size - 3)) ^ cache_decay_power
static const float cache_scores[max_cache_size] =
{
last_triangle_score, last_triangle_score, last_triangle_score,
// perl -e "for ($i = 32; $i > 3; --$i) {print sqrt(($i - 3) / 29)*(($i-3)/29);print 'f, ';}"
1, 0.948724356160999f, 0.898356365398566f, 0.84891268884993f, 0.800410940418327f,
0.752869781209394f, 0.706309027617917f, 0.660749775712085f, 0.616214545218346f,
0.572727447267172f, 0.530314381193352f, 0.489003267201725f, 0.448824323769283f,
0.409810401494183f, 0.371997389083488f, 0.335424712859142f, 0.300135959460546f,
0.266179663821797f, 0.233610323536753f, 0.202489730867139f, 0.172888763130359f,
0.144889856948659f, 0.118590544520125f, 0.0941087226511742f, 0.0715909309083965f,
0.0512263001867729f, 0.0332724579777247f, 0.0181112321185824f, 0.00640328752334662f,
};
struct Vertex
{
Vertex(): cache_position(-1), score(0), triangles_total(0), triangles_not_added(0), triangles(0)
{
}
int cache_position;
float score;
unsigned int triangles_total;
unsigned int triangles_not_added;
unsigned int* triangles;
};
static float vertexScore(const Vertex& vertex)
{
// check if there are any triangles needed
if (vertex.triangles_not_added == 0) return -1;
float score = vertex.cache_position == -1 ? 0 : cache_scores[vertex.cache_position];
// Bonus points for having a low number of tris still to use the vert, so we get rid of lone verts quickly.
// return score + valence_boost_scale * powf(vertex.triangles_not_added, -valence_boost_power);
// for valence_boost_power = 0.5f
return score + valence_boost_scale / sqrtf((float)vertex.triangles_not_added);
}
void optimizeVertexCache(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, unsigned int cache_size)
{
// support in-place optimization
std::vector<unsigned int> indices_copy;
if (destination == indices)
{
indices_copy.assign(indices, indices + index_count);
indices = &indices_copy[0];
}
// clamp cache size to the size of LUT
if (cache_size > max_cache_size)
cache_size = max_cache_size;
const float m_inf = -1e32f;
assert(index_count % 3 == 0);
size_t face_count = index_count / 3;
std::vector<Vertex> vertices(vertex_count);
// initializing vertex data
for (size_t i = 0; i < index_count; ++i)
{
vertices[indices[i]].triangles_total++;
}
// calculate total triangle number
std::vector<unsigned int> triangle_indices(index_count); // total triangle indices count == index count
unsigned int* triangle_indices_ptr = &triangle_indices[0];
// allocate storage for triangle indices
for (size_t i = 0; i < vertex_count; ++i)
{
Vertex& v = vertices[i];
v.triangles = triangle_indices_ptr;
triangle_indices_ptr += v.triangles_total;
}
// fill triangle indices
for (size_t i = 0; i < face_count; ++i)
{
Vertex& a = vertices[indices[i * 3 + 0]];
a.triangles[a.triangles_not_added++] = static_cast<unsigned int>(i);
Vertex& b = vertices[indices[i * 3 + 1]];
b.triangles[b.triangles_not_added++] = static_cast<unsigned int>(i);
Vertex& c = vertices[indices[i * 3 + 2]];
c.triangles[c.triangles_not_added++] = static_cast<unsigned int>(i);
}
// compute initial vertex scores
for (size_t i = 0; i < vertex_count; ++i)
{
Vertex& v = vertices[i];
v.score = vertexScore(v);
}
// compute triangle scores
std::vector<float> triangle_scores(face_count);
for (size_t i = 0; i < face_count; ++i)
{
unsigned int tri_a = indices[i * 3 + 0];
unsigned int tri_b = indices[i * 3 + 1];
unsigned int tri_c = indices[i * 3 + 2];
triangle_scores[i] = vertices[tri_a].score + vertices[tri_b].score + vertices[tri_c].score;
}
unsigned int* destination_end = destination + index_count;
unsigned int cache_holder[2 * (max_cache_size + 3)];
unsigned int* cache = cache_holder;
unsigned int* cache_end = cache;
unsigned int* cache_new = cache + max_cache_size + 3;
unsigned int* cache_new_end = cache_new;
int min_tri = -1;
// main body
while (destination != destination_end)
{
// find triangle with the best score if it was not found on previous step
if (min_tri < 0)
{
float min_score = m_inf;
for (size_t i = 0; i < face_count; ++i)
{
float tri_score = triangle_scores[i];
if (min_score < tri_score)
{
min_tri = int(i);
min_score = tri_score;
}
}
}
unsigned int tri_a = indices[min_tri * 3 + 0];
unsigned int tri_b = indices[min_tri * 3 + 1];
unsigned int tri_c = indices[min_tri * 3 + 2];
// add it to drawing sequence
*destination++ = tri_a;
*destination++ = tri_b;
*destination++ = tri_c;
triangle_scores[min_tri] = m_inf;
// construct new cache
cache_new_end = cache_new;
// new triangle
*cache_new_end++ = tri_a;
*cache_new_end++ = tri_b;
*cache_new_end++ = tri_c;
// old parts
int cp_a = vertices[tri_a].cache_position;
int cp_b = vertices[tri_b].cache_position;
int cp_c = vertices[tri_c].cache_position;
// order indices
if (cp_a > cp_b) std::swap(cp_a, cp_b);
if (cp_a > cp_c) std::swap(cp_a, cp_c);
if (cp_b > cp_c) std::swap(cp_b, cp_c);
if (cp_c == -1) // largest is -1 => all 3 are -1
{
cache_new_end = std::copy(cache, cache_end, cache_new_end);
}
else if (cp_b == -1) // cp_a == -1, cp_b == -1
{
cache_new_end = std::copy(cache, cache + cp_c, cache_new_end);
cache_new_end = std::copy(cache + cp_c + 1, cache_end, cache_new_end);
}
else if (cp_a == -1) // only cp_a == -1
{
cache_new_end = std::copy(cache, cache + cp_b, cache_new_end);
cache_new_end = std::copy(cache + cp_b + 1, cache + cp_c, cache_new_end);
cache_new_end = std::copy(cache + cp_c + 1, cache_end, cache_new_end);
}
else
{
cache_new_end = std::copy(cache, cache + cp_a, cache_new_end);
cache_new_end = std::copy(cache + cp_a + 1, cache + cp_b, cache_new_end);
cache_new_end = std::copy(cache + cp_b + 1, cache + cp_c, cache_new_end);
cache_new_end = std::copy(cache + cp_c + 1, cache_end, cache_new_end);
}
size_t cache_new_size = cache_new_end - cache_new;
if (cache_new_size > cache_size) cache_new_end = cache_new + cache_size;
std::swap(cache, cache_new);
std::swap(cache_end, cache_new_end);
// modify vertices
vertices[tri_a].triangles_not_added--;
vertices[tri_b].triangles_not_added--;
vertices[tri_c].triangles_not_added--;
min_tri = -1;
float min_score = m_inf;
// update cache positions, vertices scores and triangle scores, and find new min_face
for (size_t i = 0; i < cache_new_size; ++i)
{
Vertex& v = vertices[cache[i]];
v.cache_position = i >= cache_size ? -1 : (int)i;
float score = vertexScore(v);
float score_diff = score - v.score;
// update scores of vertex triangles
for (size_t j = 0; j < v.triangles_total; ++j)
{
unsigned int tri = v.triangles[j];
triangle_scores[tri] += score_diff;
float tri_score = triangle_scores[tri];
if (min_score < tri_score)
{
min_tri = tri;
min_score = tri_score;
}
}
v.score = score;
}
}
}
}
namespace Tipsify
{
struct Adjacency
{
std::vector<unsigned int> triangle_counts;
std::vector<unsigned int> offsets;
std::vector<unsigned int> data;
};
static void buildAdjacency(Adjacency& adjacency, const unsigned int* indices, size_t index_count, size_t vertex_count)
{
// fill triangle counts
adjacency.triangle_counts.resize(vertex_count, 0);
for (size_t i = 0; i < index_count; ++i)
{
assert(indices[i] < vertex_count);
adjacency.triangle_counts[indices[i]]++;
}
// fill offset table
adjacency.offsets.resize(vertex_count);
unsigned int offset = 0;
for (size_t i = 0; i < vertex_count; ++i)
{
adjacency.offsets[i] = offset;
offset += adjacency.triangle_counts[i];
}
// fill triangle data
adjacency.data.resize(offset);
std::vector<unsigned int> offsets = adjacency.offsets;
size_t face_count = index_count / 3;
for (size_t i = 0; i < face_count; ++i)
{
unsigned int a = indices[i * 3 + 0], b = indices[i * 3 + 1], c = indices[i * 3 + 2];
adjacency.data[offsets[a]++] = unsigned(i);
adjacency.data[offsets[b]++] = unsigned(i);
adjacency.data[offsets[c]++] = unsigned(i);
}
}
static unsigned int getNextVertexDeadEnd(const std::vector<unsigned int>& dead_end, unsigned int& dead_end_top, unsigned int& input_cursor, const std::vector<unsigned int>& live_triangles)
{
// check dead-end stack
while (dead_end_top)
{
unsigned int vertex = dead_end[--dead_end_top];
if (live_triangles[vertex] > 0)
return vertex;
}
// input order
while (input_cursor < live_triangles.size())
{
if (live_triangles[input_cursor] > 0)
return input_cursor;
++input_cursor;
}
return static_cast<unsigned int>(-1);
}
static unsigned int getNextVertexNeighbour(const unsigned int* next_candidates_begin, const unsigned int* next_candidates_end, const std::vector<unsigned int>& live_triangles, const std::vector<unsigned int>& cache_timestamps, unsigned int timestamp, unsigned int cache_size)
{
unsigned int best_candidate = static_cast<unsigned int>(-1);
int best_priority = -1;
for (const unsigned int* next_candidate = next_candidates_begin; next_candidate != next_candidates_end; ++next_candidate)
{
unsigned int vertex = *next_candidate;
// otherwise we don't need to process it
if (live_triangles[vertex] > 0)
{
int priority = 0;
// will it be in cache after fanning?
if (2 * live_triangles[vertex] + timestamp - cache_timestamps[vertex] <= cache_size)
{
priority = timestamp - cache_timestamps[vertex]; // position in cache
}
if (priority > best_priority)
{
best_candidate = vertex;
best_priority = priority;
}
}
}
return best_candidate;
}
void optimizeVertexCache(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, unsigned int cache_size)
{
assert(index_count % 3 == 0);
assert(cache_size >= 3);
// guard for empty meshes
if (index_count == 0 || vertex_count == 0)
{
return;
}
// support in-place optimization
std::vector<unsigned int> indices_copy;
if (destination == indices)
{
indices_copy.assign(indices, indices + index_count);
indices = &indices_copy[0];
}
// build adjacency information
Adjacency adjacency;
buildAdjacency(adjacency, indices, index_count, vertex_count);
// live triangle counts
std::vector<unsigned int> live_triangles = adjacency.triangle_counts;
// cache time stamps
std::vector<unsigned int> cache_timestamps(vertex_count, 0);
// dead-end stack
std::vector<unsigned int> dead_end(index_count);
unsigned int dead_end_top = 0;
// emitted flags
std::vector<char> emitted_flags(index_count / 3, false);
unsigned int current_vertex = 0;
unsigned int timestamp = cache_size + 1;
unsigned int input_cursor = 1; // vertex to restart from in case of dead-end
unsigned int output_triangle = 0;
while (current_vertex != static_cast<unsigned int>(-1))
{
const unsigned int* next_candidates_begin = &dead_end[0] + dead_end_top;
// emit all vertex neighbours
const unsigned int* neighbours_begin = &adjacency.data[0] + adjacency.offsets[current_vertex];
const unsigned int* neighbours_end = neighbours_begin + adjacency.triangle_counts[current_vertex];
for (const unsigned int* it = neighbours_begin; it != neighbours_end; ++it)
{
unsigned int triangle = *it;
if (!emitted_flags[triangle])
{
unsigned int a = indices[triangle * 3 + 0], b = indices[triangle * 3 + 1], c = indices[triangle * 3 + 2];
// output indices
destination[output_triangle * 3 + 0] = a;
destination[output_triangle * 3 + 1] = b;
destination[output_triangle * 3 + 2] = c;
output_triangle++;
// update dead-end stack
dead_end[dead_end_top + 0] = a;
dead_end[dead_end_top + 1] = b;
dead_end[dead_end_top + 2] = c;
dead_end_top += 3;
// update live triangle counts
live_triangles[a]--;
live_triangles[b]--;
live_triangles[c]--;
// update cache info
// if vertex is not in cache, put it in cache
if (timestamp - cache_timestamps[a] > cache_size)
cache_timestamps[a] = timestamp++;
if (timestamp - cache_timestamps[b] > cache_size)
cache_timestamps[b] = timestamp++;
if (timestamp - cache_timestamps[c] > cache_size)
cache_timestamps[c] = timestamp++;
// update emitted flags
emitted_flags[triangle] = true;
}
}
// next candidates are the ones we pushed to dead-end stack just now
const unsigned int* next_candidates_end = &dead_end[0] + dead_end_top;
// get next vertex
current_vertex = getNextVertexNeighbour(next_candidates_begin, next_candidates_end, live_triangles, cache_timestamps, timestamp, cache_size);
if (current_vertex == static_cast<unsigned int>(-1))
{
current_vertex = getNextVertexDeadEnd(dead_end, dead_end_top, input_cursor, live_triangles);
}
}
assert(output_triangle == index_count / 3);
}
}
void gridGen(std::vector<unsigned int>& indices, int x0, int x1, int y0, int y1, int width, int cacheSize, bool prefetch)
{
if (x1 - x0 + 1 < cacheSize)
{
if (2 * (x1 - x0) + 1 > cacheSize && prefetch)
{
for (int x = x0; x < x1; x++)
{
indices.push_back(x + 0);
indices.push_back(x + 0);
indices.push_back(x + 1);
}
}
for (int y = y0; y < y1; y++)
{
for (int x = x0; x < x1; x++)
{
indices.push_back((width + 1) * (y + 0) + (x + 0));
indices.push_back((width + 1) * (y + 1) + (x + 0));
indices.push_back((width + 1) * (y + 0) + (x + 1));
indices.push_back((width + 1) * (y + 0) + (x + 1));
indices.push_back((width + 1) * (y + 1) + (x + 0));
indices.push_back((width + 1) * (y + 1) + (x + 1));
}
}
}
else
{
int xm = x0 + cacheSize - 2;
gridGen(indices, x0, xm, y0, y1, width, cacheSize, prefetch);
gridGen(indices, xm, x1, y0, y1, width, cacheSize, prefetch);
}
}
unsigned int queryVSInvocations(ID3D11Device* device, ID3D11DeviceContext* context, const unsigned int* indices, size_t index_count)
{
ID3D11Buffer* ib = 0;
{
D3D11_BUFFER_DESC bd = {};
bd.Usage = D3D11_USAGE_DYNAMIC;
bd.ByteWidth = index_count * 4;
bd.BindFlags = D3D11_BIND_INDEX_BUFFER;
bd.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
device->CreateBuffer(&bd, 0, &ib);
D3D11_MAPPED_SUBRESOURCE ms;
context->Map(ib, 0, D3D11_MAP_WRITE_DISCARD, 0, &ms);
memcpy(ms.pData, indices, index_count * 4);
context->Unmap(ib, 0);
}
context->IASetPrimitiveTopology(D3D10_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
context->IASetIndexBuffer(ib, DXGI_FORMAT_R32_UINT, 0);
D3D11_QUERY_DESC qdesc = { D3D11_QUERY_PIPELINE_STATISTICS };
ID3D11Query* query = 0;
device->CreateQuery(&qdesc, &query);
context->Begin(query);
context->DrawIndexed(index_count, 0, 0);
context->End(query);
D3D11_QUERY_DATA_PIPELINE_STATISTICS stats = {};
while (S_FALSE == context->GetData(query, &stats, sizeof(stats), 0))
;
query->Release();
ib->Release();
assert(stats.IAVertices == index_count);
return stats.VSInvocations;
}
unsigned int estimateVSInvocationsFIFO(const unsigned int* indices, size_t index_count, size_t cache_size)
{
std::vector<unsigned int> cache;
unsigned int invocations = 0;
for (size_t i = 0; i < index_count; ++i)
{
unsigned int index = indices[i];
bool found = false;
for (size_t j = 0; j < cache.size(); ++j)
{
if (cache[j] == index)
{
found = true;
break;
}
}
if (!found)
invocations++;
if (!found)
cache.push_back(index);
if (cache.size() > cache_size)
cache.erase(cache.begin(), cache.end() - cache_size);
}
return invocations;
}
unsigned int estimateVSInvocationsLRU(const unsigned int* indices, size_t index_count, size_t cache_size)
{
std::vector<unsigned int> cache;
unsigned int invocations = 0;
for (size_t i = 0; i < index_count; ++i)
{
unsigned int index = indices[i];
bool found = false;
for (size_t j = 0; j < cache.size(); ++j)
{
if (cache[j] == index)
{
found = true;
cache.erase(cache.begin() + j);
break;
}
}
if (!found)
invocations++;
cache.push_back(index);
if (cache.size() > cache_size)
cache.erase(cache.begin(), cache.end() - cache_size);
}
return invocations;
}
unsigned int estimateVSInvocationsReuse(const unsigned int* indices, size_t index_count, size_t warp_size, size_t fifo_size, size_t triangle_limit)
{
std::vector<unsigned int> warp;
unsigned int invocations = 0;
unsigned int triangles = 0;
for (size_t i = 0; i < index_count; i += 3)
{
bool found[3] = {};
for (size_t k = 0; k < 3; ++k)
{
for (size_t j = 0; j < warp.size(); ++j)
{
if (warp.size() - j > fifo_size)
continue;
if (warp[j] == indices[i + k])
{
found[k] = true;
break;
}
}
}
// triangle doesn't fit into warp we're currently building - flush
if (warp.size() + !found[0] + !found[1] + !found[2] > warp_size || triangles >= triangle_limit)
{
invocations += warp.size();
warp.clear();
// restart warp from scratch
warp.push_back(indices[i + 0]);
warp.push_back(indices[i + 1]);
warp.push_back(indices[i + 2]);
triangles = 1;
}
else
{
if (!found[0])
warp.push_back(indices[i + 0]);
if (!found[1])
warp.push_back(indices[i + 1]);
if (!found[2])
warp.push_back(indices[i + 2]);
triangles++;
}
}
// last warp
invocations += warp.size();
return invocations;
}
void setupShaders(ID3D11Device* device, ID3D11DeviceContext* context)
{
// load and compile the two shaders
const char* shaders =
"#define ATTRIBUTES 5\n"
"struct Foo { float4 v[ATTRIBUTES]; };"
"float4 VS(uint index: SV_VertexId, out Foo foo: FOO): SV_Position { uint i = index % 3; [unroll] for (int j = 0; j < ATTRIBUTES; j++) foo.v[j] = j; return float4(i != 0, i != 2, 0, 1); }"
"float4 PS(Foo foo: FOO): SV_Target { float4 result = 0; [unroll] for (int j = 0; j < ATTRIBUTES; j++) result += foo.v[j]; return result; }";
ID3DBlob* vsblob = 0;
ID3DBlob* psblob = 0;
D3DCompile(shaders, strlen(shaders), 0, 0, 0, "VS", "vs_5_0", 0, 0, &vsblob, 0);
D3DCompile(shaders, strlen(shaders), 0, 0, 0, "PS", "ps_5_0", 0, 0, &psblob, 0);
ID3D11VertexShader* vs = 0;
ID3D11PixelShader* ps = 0;
device->CreateVertexShader(vsblob->GetBufferPointer(), vsblob->GetBufferSize(), 0, &vs);
device->CreatePixelShader(psblob->GetBufferPointer(), psblob->GetBufferSize(), 0, &ps);
context->VSSetShader(vs, 0, 0);
context->PSSetShader(ps, 0, 0);
}
template <typename Cache>
void inspectCache(Cache cache)
{
unsigned int max_cache_size = 200;
unsigned int grid_size = 100;
for (unsigned int cache_size = 3; cache_size <= max_cache_size; cache_size += 1)
{
std::vector<unsigned int> grid1;
gridGen(grid1, 0, grid_size, 0, grid_size, grid_size, cache_size, true);
std::vector<unsigned int> grid2;
gridGen(grid2, 0, grid_size, 0, grid_size, grid_size, cache_size, false);
std::vector<unsigned int> grid3;
gridGen(grid3, 0, grid_size, 0, grid_size, grid_size, grid_size * 4, false); // this generates a simple indexed grid without striping/degenerate triangles
Tipsify::optimizeVertexCache(&grid3[0], &grid3[0], grid3.size(), (grid_size + 1) * (grid_size + 1), cache_size);
std::vector<unsigned int> grid4;
gridGen(grid4, 0, grid_size, 0, grid_size, grid_size, grid_size * 4, false); // this generates a simple indexed grid without striping/degenerate triangles
TomF::optimizeVertexCache(&grid4[0], &grid4[0], grid4.size(), (grid_size + 1) * (grid_size + 1), cache_size);
unsigned int invocations1 = cache(&grid1[0], grid1.size());
unsigned int invocations2 = cache(&grid2[0], grid2.size());
unsigned int invocations3 = cache(&grid3[0], grid3.size());
unsigned int invocations4 = cache(&grid4[0], grid4.size());
unsigned int ideal_invocations = (grid_size + 1) * (grid_size + 1);
printf("%d, %f, %f, %f, %f\n", cache_size,
double(invocations1) / double(ideal_invocations),
double(invocations2) / double(ideal_invocations),
double(invocations3) / double(ideal_invocations),
double(invocations4) / double(ideal_invocations));
}
}
void testCache(IDXGIAdapter* adapter)
{
ID3D11Device* device = 0;
ID3D11DeviceContext* context = 0;
D3D11CreateDevice(adapter, D3D_DRIVER_TYPE_UNKNOWN, 0, 0, 0, 0, D3D11_SDK_VERSION, &device, 0, &context);
setupShaders(device, context);
inspectCache([&](const unsigned int* indices, size_t index_count) { return queryVSInvocations(device, context, indices, index_count); });
unsigned int dib[] = { 0, 1, 1, 2, 3, 4, 5, 5, 5 };
printf("// Degenerate IB invocations: %d\n", queryVSInvocations(device, context, dib, sizeof(dib) / sizeof(dib[0])));
printf("// Reuse boundaries: ");
unsigned int last = 0;
for (unsigned int count = 3; count < 1000; count += 3)
{
std::vector<unsigned int> ib;
for (unsigned int i = 0; i < count; i += 3)
{
ib.push_back(0);
ib.push_back(1);
ib.push_back(2);
}
unsigned int invocations = queryVSInvocations(device, context, ib.data(), ib.size());
if (invocations != last)
{
printf("%d, ", count);
last = invocations;
}
}
printf("\n");
}
void modelCacheFIFO(size_t model_cache_size)
{
inspectCache([&](const unsigned int* indices, size_t index_count) { return estimateVSInvocationsFIFO(indices, index_count, model_cache_size); });
}
void modelCacheLRU(size_t model_cache_size)
{
inspectCache([&](const unsigned int* indices, size_t index_count) { return estimateVSInvocationsLRU(indices, index_count, model_cache_size); });
}
void modelCacheReuse(size_t model_warp_size, size_t model_fifo_size, size_t model_triangle_limit)
{
inspectCache([&](const unsigned int* indices, size_t index_count) { return estimateVSInvocationsReuse(indices, index_count, model_warp_size, model_fifo_size, model_triangle_limit); });
}
int main(int argc, char** argv)
{
IDXGIFactory1* factory = 0;
CreateDXGIFactory1(__uuidof(IDXGIFactory1), (void**)&factory);
IDXGIAdapter* adapter = NULL;
for (unsigned int index = 0; SUCCEEDED(factory->EnumAdapters(index, &adapter)); ++index)
{
DXGI_ADAPTER_DESC ad = {};
adapter->GetDesc(&ad);
printf("// GPU %d: %S\n", index, ad.Description);
testCache(adapter);
}
printf("// Model FIFO 128\n");
modelCacheFIFO(128);
printf("// Model LRU 16\n");
modelCacheLRU(16);
printf("// Model Reuse 32\n");
modelCacheReuse(32, 32, 32);
printf("// Model Reuse 32 w/16-entry FIFO\n");
modelCacheReuse(32, 16, 32);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment