Skip to content

Instantly share code, notes, and snippets.

@szdarkhack
Last active September 22, 2019 18:14
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save szdarkhack/8145086 to your computer and use it in GitHub Desktop.
Save szdarkhack/8145086 to your computer and use it in GitHub Desktop.
Here's the code sample, let me know if you need any clarifications.
// Description:
//
// A simple test program using the EXT_image_load_store extention (core in OpenGL 4.2 and above) to completely take over the fragment operations,
// in the hope of providing a method for accurate emulation of systems offering stencil/depth tests or blending operations that are impossible to
// implement using the standard OpenGL equivalents, such as the Sony PSP. This method also has the benefit of being single-pass, so it is pretty
// efficient and doesn't require any extra framebuffers.
//
// The rendering shader renders both to the framebuffer and to a few images (color, depth, stencil) such that it can subsequently use the data
// to perform depth/stencil testing and blending. Memory consistency is achieved by a combination of cache qualifiers, per-pixel mutexes and
// memory barriers where appropriate. Specifically, glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT) is required between draw calls with
// minimal performance impact in all cases. In situations where a single draw call may overlap with itself, the coherent cache qualifier is
// required in the declarations of the images in the shader. The mutex texture is also used in the same situation to avoid scheduling issues.
//
// Incorporating this tech into existing software will require changes to state management (since tests are now in the shader and must remain
// disabled in the normal OpenGL pipeline) and shader generation (to incorporate the tests).
//
// If you plan to build it, you'll need the latest GLFW and GLEW libraries, as well as a graphics card supporting OpenGL 4.2 and up to date
// graphics drivers.
//
// Keys:
//
// Esc Quit
// B Toggle blending (in-shader, hardcoded weights of 0.5 and 0.5, just for testing purposes)
// D Toggle depth testing (in-shader)
// T Toggle drawing the spinning triangle
// O Toggle between drawing the quad and drawing a self-overlapping triangle strip
//
// Notes on the code:
//
// The code was written in a hurry, patched left and right and as a result is very ugly, full of globals and macros and even the disgusting goto.
// Sorry :)
// The actual important parts of the code should be pretty clear and commented.
#include <iostream>
#include <string>
#include <gl/glew.h>
#include <GL/wglew.h>
#include <GLFW/glfw3.h>
// Sorry for the macros, too busy to make this pretty
#define GL_MAJOR 4
#define GL_MINOR 2
#define STRINGIFY_IMP(A) #A
#define STRINGIFY(A) STRINGIFY_IMP(A)
#define GL_VERSION_STR "VERSION_" STRINGIFY(GL_MAJOR) "_" STRINGIFY(GL_MINOR)
#define BUFFER_OFFSET(bytes) ((GLubyte*)NULL + (bytes))
#define ABORT {std::cerr << "Aborting program...\n"; state = -1; goto CLEANUP;}
// Also sorry for the globals, again too busy to properly wrap this in a class or something
GLint ui_depthTestEnabled;
GLint ui_depthWriteEnabled;
GLint ui_blendEnabled;
int state = 0;
int dte_value = 1;
int overlap = 1;
int draw_triangle = 1;
int blend = 1;
// GLFW error handler
void error_callback(int error, const char* description)
{
std::cerr << description;
}
// GLFW key event handler
static void key_callback(GLFWwindow* window, int key, int scancode, int action, int mods)
{
if (action == GLFW_PRESS) switch (key) {
case GLFW_KEY_ESCAPE:
glfwSetWindowShouldClose(window, GL_TRUE);
break;
case GLFW_KEY_D:
dte_value ^= 1;
glUniform1i(ui_depthTestEnabled, dte_value);
std::cout << "Depth test " << (dte_value == 1 ? "en" : "dis") << "abled.\n";
break;
case GLFW_KEY_O:
overlap ^= 1;
std::cout << "Drawing " << (overlap == 0 ? "quad.\n" : "overlapping triangle strip.\n");
break;
case GLFW_KEY_T:
draw_triangle ^= 1;
std::cout << "Triangle " << (draw_triangle == 1 ? "en" : "dis") << "abled.\n";
break;
case GLFW_KEY_B:
blend ^= 1;
glUniform1i(ui_blendEnabled, blend);
std::cout << "Blending " << (blend == 1 ? "en" : "dis") << "abled.\n";
break;
}
}
// Print diagnostics about our support
static bool reportSupported(const char *ext)
{
std::string glext = "GL_";
glext += ext;
bool good = glewIsSupported(glext.c_str()) == GL_TRUE;
std::cout << ext << " is" << (good ? " " : " NOT ") << "supported.\n";
return good;
}
int main()
{
if (!glfwInit()) return -1;
glfwSetErrorCallback(error_callback);
glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, GL_MAJOR);
glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, GL_MINOR);
// Compatibility profile to avoid some vertex shaders and manual matrix manipulation
glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_COMPAT_PROFILE);
int width = 640;
int height = 480;
float ratio;
// Setup window and context
glfwWindowHint(GLFW_RESIZABLE, GL_FALSE);
GLFWwindow* window = glfwCreateWindow(width, height, "GLFW Window", nullptr, nullptr);
if (!window) return -1;
glfwMakeContextCurrent(window);
GLenum err = glewInit();
if (err != GLEW_OK)
{
std::cerr << glewGetErrorString(err) << '\n';
return -1;
}
// Check for OpenGL 4.2
if (!reportSupported(GL_VERSION_STR)) ABORT;
glfwSetKeyCallback(window, key_callback);
wglSwapIntervalEXT(1);
glfwGetFramebufferSize(window, &width, &height);
ratio = width / static_cast<float>(height);
// Setup textures
// Note that we don't need to configure any filtering since we're
// using them through images instead of samplers
enum {
color,
depth,
stencil,
mutex,
texnum
};
GLuint textures[texnum];
glGenTextures(texnum, textures);
// Color/alpha texture
glBindTexture(GL_TEXTURE_2D, textures[color]);
glTexStorage2D(GL_TEXTURE_2D, 1, GL_RGBA8, width, height);
// Depth texture
glBindTexture(GL_TEXTURE_2D, textures[depth]);
glTexStorage2D(GL_TEXTURE_2D, 1, GL_R16F, width, height);
// Stencil texture
glBindTexture(GL_TEXTURE_2D, textures[stencil]);
glTexStorage2D(GL_TEXTURE_2D, 1, GL_R8UI, width, height);
// Mutex buffer
glBindTexture(GL_TEXTURE_2D, textures[mutex]);
// GL_R32UI (or GL_R32I) necessary for atomic operations
glTexStorage2D(GL_TEXTURE_2D, 1, GL_R32UI, width, height);
// Initialize the mutex to 0
unsigned int *zeros = new unsigned int[width*height];
memset(zeros, 0, width*height*4);
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, width, height, GL_RED, GL_UNSIGNED_INT, zeros);
delete[] zeros;
glBindImageTexture(0, textures[color], 0, GL_FALSE, 0, GL_READ_WRITE, GL_RGBA8);
glBindImageTexture(1, textures[depth], 0, GL_FALSE, 0, GL_READ_WRITE, GL_R16F);
glBindImageTexture(2, textures[stencil], 0, GL_FALSE, 0, GL_READ_WRITE, GL_R8UI);
glBindImageTexture(3, textures[mutex], 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32UI);
// Setup vertex arrays
// Note that the triangle is slightly tilted to check the depth test
float triangle_array[] = {
1.f, 0.f, 0.f,
-0.6f, -0.4f, 0.2f,
0.f, 1.f, 0.f,
0.6f, -0.4f, 0.f,
0.f, 0.f, 1.f,
0.f, 0.6f, -0.2f
};
float square_array[] = {
1.f, 0.f, 0.f,
-0.8f, -0.8f, 0.f,
1.f, 0.f, 0.f,
-0.8f, 0.8f, 0.f,
1.f, 0.f, 0.f,
0.8f, 0.8f, 0.f,
1.f, 0.f, 0.f,
0.8f, -0.8f, 0.f
};
enum {
triangle,
square,
clear,
vao_size
};
GLuint vao[vao_size];
glGenVertexArrays(vao_size, vao);
GLuint vertex_buffers[vao_size];
glGenBuffers(vao_size, vertex_buffers);
glBindVertexArray(vao[triangle]);
glBindBuffer(GL_ARRAY_BUFFER, vertex_buffers[triangle]);
glBufferData(GL_ARRAY_BUFFER, sizeof(triangle_array), triangle_array, GL_STATIC_DRAW);
glInterleavedArrays(GL_C3F_V3F, 0, BUFFER_OFFSET(0));
glEnableClientState(GL_VERTEX_ARRAY);
glEnableClientState(GL_COLOR_ARRAY);
glBindVertexArray(vao[square]);
glBindBuffer(GL_ARRAY_BUFFER, vertex_buffers[square]);
glBufferData(GL_ARRAY_BUFFER, sizeof(square_array), square_array, GL_STATIC_DRAW);
glInterleavedArrays(GL_C3F_V3F, 0, BUFFER_OFFSET(0));
glEnableClientState(GL_VERTEX_ARRAY);
glEnableClientState(GL_COLOR_ARRAY);
const float clearQuad[] = {
-1.f, -1.f,
-1.f, 1.f,
1.f, 1.f,
1.f, -1.f
};
glBindVertexArray(vao[clear]);
glBindBuffer(GL_ARRAY_BUFFER, vertex_buffers[clear]);
glBufferData(GL_ARRAY_BUFFER, sizeof(clearQuad), clearQuad, GL_STATIC_DRAW);
glVertexPointer(2, GL_FLOAT, 0, BUFFER_OFFSET(0));
glEnableClientState(GL_VERTEX_ARRAY);
// Setup shader
GLuint fs = glCreateShader(GL_FRAGMENT_SHADER);
GLint compiled, linked;
// Sample shader, implements basic depth testing and blending.
// The 'coherent' qualifier, as well as the entire mutex image,
// are only required when there is a chance of overlap within a
// single draw call.
const char *fs_source =
"#version 420 compatibility\n\
layout (binding = 0, rgba8) uniform coherent image2D color;\n\
layout (binding = 1, r16f) uniform coherent image2D depth;\n\
layout (binding = 2, r8ui) uniform coherent uimage2D stencil;\n\
layout (binding = 3, r32ui) uniform coherent uimage2D mutex;\n\
\n\
uniform int depthTestEnabled;\n\
uniform int depthWriteEnabled;\n\
uniform int blendEnabled;\n\
\n\
void main()\n\
{\n\
float cur_depth = gl_FragCoord.z;\n\
ivec2 pix_coord = ivec2(gl_FragCoord.xy);\n\
\n\
while (imageAtomicCompSwap(mutex, pix_coord, 0, 1) > 0);\n\
\n\
vec4 old_color = imageLoad(color, pix_coord);\n\
float old_depth = imageLoad(depth, pix_coord).r;\n\
uint old_stencil = imageLoad(stencil, pix_coord).r;\n\
vec4 out_color;\n\
if (depthTestEnabled > 0 && cur_depth > old_depth) {\n\
imageStore(mutex, pix_coord, uvec4(0));\n\
discard;\n\
}\n\
if (blendEnabled > 0) out_color = 0.5*gl_Color + 0.5*old_color;\n\
else out_color = gl_Color;\n\
imageStore(color, pix_coord, out_color);\n\
if (depthWriteEnabled > 0) imageStore(depth, pix_coord, vec4(cur_depth));\n\
gl_FragColor = out_color;\n\
imageStore(mutex, pix_coord, uvec4(0));\n\
}";
GLuint program;
glShaderSource(fs, 1, &fs_source, 0);
glCompileShader(fs);
glGetShaderiv(fs, GL_COMPILE_STATUS, &compiled);
if (!compiled) {
GLint length;
GLchar* log;
glGetShaderiv(fs, GL_INFO_LOG_LENGTH, &length);
log = new GLchar[length];
glGetShaderInfoLog(fs, length, &length, log);
std::cerr << "FS compile log = '" << log << "'\n";
delete[] log;
ABORT;
}
program = glCreateProgram();
glAttachShader(program, fs);
glDeleteShader(fs); // flagged for deletion when we delete the program
glLinkProgram(program);
glGetProgramiv(program, GL_LINK_STATUS, &linked);
if (linked)
{
std::cout << "Program linked successfully\n";
// Get uniform locations
ui_depthTestEnabled = glGetUniformLocation(program, "depthTestEnabled");
if (ui_depthTestEnabled == -1) { std::cerr << "Couldn't locate uniform 'depthTestEnabled'.\n"; ABORT;}
ui_depthWriteEnabled = glGetUniformLocation(program, "depthWriteEnabled");
if (ui_depthWriteEnabled == -1) { std::cerr << "Couldn't locate uniform 'depthWriteEnabled'.\n"; ABORT;}
ui_blendEnabled = glGetUniformLocation(program, "blendEnabled");
if (ui_blendEnabled == -1) { std::cerr << "Couldn't locate uniform 'blendEnabled'.\n"; ABORT;}
// Set the initial values for the uniforms
glProgramUniform1i(program, ui_depthTestEnabled, dte_value);
glProgramUniform1i(program, ui_depthWriteEnabled, 1);
glProgramUniform1i(program, ui_blendEnabled, blend);
}
else
{
GLint length;
GLchar* log;
glGetProgramiv(program, GL_INFO_LOG_LENGTH, &length);
log = new GLchar[length];
glGetProgramInfoLog(program, length, &length, log);
std::cerr << "Program link log = '" << log << "'\n";
delete[] log;
ABORT;
}
GLuint clear_program;
GLuint cvs = glCreateShader(GL_VERTEX_SHADER);
GLuint cfs = glCreateShader(GL_FRAGMENT_SHADER);
const char *clear_vs_source =
"#version 420 compatibility\n\
void main()\n\
{\n\
gl_Position = vec4(gl_Vertex.xy, 0.0, 1.0);\n\
}";
const char *clear_fs_source =
"#version 420 compatibility\n\
layout (binding = 0, rgba8) uniform image2D color;\n\
layout (binding = 1, r16f) uniform image2D depth;\n\
layout (binding = 2, r8ui) uniform uimage2D stencil;\n\
uniform vec4 colorclear;\n\
uniform float depthclear;\n\
\n\
void main()\n\
{\n\
ivec2 pix_coord = ivec2(gl_FragCoord.xy);\n\
imageStore(color, pix_coord, colorclear);\n\
imageStore(depth, pix_coord, vec4(depthclear));\n\
imageStore(stencil, pix_coord, uvec4(0));\n\
gl_FragColor = colorclear;\n\
}";
glShaderSource(cvs, 1, &clear_vs_source, 0);
glCompileShader(cvs);
glGetShaderiv(cvs, GL_COMPILE_STATUS, &compiled);
if (!compiled) {
GLint length;
GLchar* log;
glGetShaderiv(cvs, GL_INFO_LOG_LENGTH, &length);
log = new GLchar[length];
glGetShaderInfoLog(cvs, length, &length, log);
std::cerr << "Clear VS compile log = '" << log << "'\n";
delete[] log;
ABORT;
}
glShaderSource(cfs, 1, &clear_fs_source, 0);
glCompileShader(cfs);
glGetShaderiv(cfs, GL_COMPILE_STATUS, &compiled);
if (!compiled) {
GLint length;
GLchar* log;
glGetShaderiv(cfs, GL_INFO_LOG_LENGTH, &length);
log = new GLchar[length];
glGetShaderInfoLog(cfs, length, &length, log);
std::cerr << "Clear FS compile log = '" << log << "'\n";
delete[] log;
ABORT;
}
clear_program = glCreateProgram();
glAttachShader(clear_program, cvs);
glDeleteShader(cvs); // flagged for deletion when we delete the program
glAttachShader(clear_program, cfs);
glDeleteShader(cfs); // flagged for deletion when we delete the program
glLinkProgram(clear_program);
glGetProgramiv(clear_program, GL_LINK_STATUS, &linked);
if (linked)
{
std::cout << "Clear program linked successfully\n";
// Get uniform locations
GLint uv_colorclear = glGetUniformLocation(clear_program, "colorclear");
if (uv_colorclear == -1) { std::cerr << "Couldn't locate uniform 'colorclear'.\n"; ABORT;}
GLint uf_depthclear = glGetUniformLocation(clear_program, "depthclear");
if(uf_depthclear == -1) { std::cerr << "Couldn't locate uniform 'depthclear'.\n"; ABORT;}
// Set the initial values for the uniforms
glProgramUniform4f(clear_program, uv_colorclear, 0.0f, 0.0f, 0.3f, 1.0f);
glProgramUniform1f(clear_program, uf_depthclear, 1.0f);
}
else
{
GLint length;
GLchar* log;
glGetProgramiv(clear_program, GL_INFO_LOG_LENGTH, &length);
log = new GLchar[length];
glGetProgramInfoLog(clear_program, length, &length, log);
std::cerr << "Clear program link log = '" << log << "'\n";
delete[] log;
ABORT;
}
// Just to test single draw-call overlapping polygons
GLenum mode[2] = { GL_QUADS, GL_TRIANGLE_STRIP };
// Projection/viewport setup
glViewport(0, 0, width, height);
glMatrixMode(GL_PROJECTION);
glLoadIdentity();
glOrtho(-ratio, ratio, -1.f, 1.f, 1.f, -1.f);
glMatrixMode(GL_MODELVIEW);
// Main loop
while (!glfwWindowShouldClose(window))
{
glUseProgram(clear_program);
glBindVertexArray(vao[clear]);
glDrawArrays(GL_QUADS, 0, 4);
glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
glUseProgram(program);
glLoadIdentity();
glBindVertexArray(vao[square]);
glDrawArrays(mode[overlap], 0, 4);
// This barrier for image accesses is necessary between draw calls
// to prevent shaders from running out of order, which can seriously
// mess up depth/stencil testing and blending operations.
glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
if (draw_triangle)
{
glBindVertexArray(vao[triangle]);
glRotatef(static_cast<float>(glfwGetTime()) * 50.f, 0.f, 0.f, 1.f);
glDrawArrays(GL_TRIANGLES, 0, 3);
}
glfwSwapBuffers(window);
glfwPollEvents();
}
CLEANUP:
// Cleanup
glUseProgram(0);
glDeleteProgram(program);
glDeleteVertexArrays(vao_size, vao);
glDeleteBuffers(vao_size, vertex_buffers);
glDeleteTextures(texnum, textures);
glfwDestroyWindow(window);
glfwTerminate();
if (state != 0) system("pause");
return state;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment