Last active
September 22, 2019 18:14
-
-
Save szdarkhack/8145086 to your computer and use it in GitHub Desktop.
Here's the code sample, let me know if you need any clarifications.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Description: | |
// | |
// A simple test program using the EXT_image_load_store extention (core in OpenGL 4.2 and above) to completely take over the fragment operations, | |
// in the hope of providing a method for accurate emulation of systems offering stencil/depth tests or blending operations that are impossible to | |
// implement using the standard OpenGL equivalents, such as the Sony PSP. This method also has the benefit of being single-pass, so it is pretty | |
// efficient and doesn't require any extra framebuffers. | |
// | |
// The rendering shader renders both to the framebuffer and to a few images (color, depth, stencil) such that it can subsequently use the data | |
// to perform depth/stencil testing and blending. Memory consistency is achieved by a combination of cache qualifiers, per-pixel mutexes and | |
// memory barriers where appropriate. Specifically, glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT) is required between draw calls with | |
// minimal performance impact in all cases. In situations where a single draw call may overlap with itself, the coherent cache qualifier is | |
// required in the declarations of the images in the shader. The mutex texture is also used in the same situation to avoid scheduling issues. | |
// | |
// Incorporating this tech into existing software will require changes to state management (since tests are now in the shader and must remain | |
// disabled in the normal OpenGL pipeline) and shader generation (to incorporate the tests). | |
// | |
// If you plan to build it, you'll need the latest GLFW and GLEW libraries, as well as a graphics card supporting OpenGL 4.2 and up to date | |
// graphics drivers. | |
// | |
// Keys: | |
// | |
// Esc Quit | |
// B Toggle blending (in-shader, hardcoded weights of 0.5 and 0.5, just for testing purposes) | |
// D Toggle depth testing (in-shader) | |
// T Toggle drawing the spinning triangle | |
// O Toggle between drawing the quad and drawing a self-overlapping triangle strip | |
// | |
// Notes on the code: | |
// | |
// The code was written in a hurry, patched left and right and as a result is very ugly, full of globals and macros and even the disgusting goto. | |
// Sorry :) | |
// The actual important parts of the code should be pretty clear and commented. | |
#include <iostream> | |
#include <string> | |
#include <gl/glew.h> | |
#include <GL/wglew.h> | |
#include <GLFW/glfw3.h> | |
// Sorry for the macros, too busy to make this pretty | |
#define GL_MAJOR 4 | |
#define GL_MINOR 2 | |
#define STRINGIFY_IMP(A) #A | |
#define STRINGIFY(A) STRINGIFY_IMP(A) | |
#define GL_VERSION_STR "VERSION_" STRINGIFY(GL_MAJOR) "_" STRINGIFY(GL_MINOR) | |
#define BUFFER_OFFSET(bytes) ((GLubyte*)NULL + (bytes)) | |
#define ABORT {std::cerr << "Aborting program...\n"; state = -1; goto CLEANUP;} | |
// Also sorry for the globals, again too busy to properly wrap this in a class or something | |
GLint ui_depthTestEnabled; | |
GLint ui_depthWriteEnabled; | |
GLint ui_blendEnabled; | |
int state = 0; | |
int dte_value = 1; | |
int overlap = 1; | |
int draw_triangle = 1; | |
int blend = 1; | |
// GLFW error handler | |
void error_callback(int error, const char* description) | |
{ | |
std::cerr << description; | |
} | |
// GLFW key event handler | |
static void key_callback(GLFWwindow* window, int key, int scancode, int action, int mods) | |
{ | |
if (action == GLFW_PRESS) switch (key) { | |
case GLFW_KEY_ESCAPE: | |
glfwSetWindowShouldClose(window, GL_TRUE); | |
break; | |
case GLFW_KEY_D: | |
dte_value ^= 1; | |
glUniform1i(ui_depthTestEnabled, dte_value); | |
std::cout << "Depth test " << (dte_value == 1 ? "en" : "dis") << "abled.\n"; | |
break; | |
case GLFW_KEY_O: | |
overlap ^= 1; | |
std::cout << "Drawing " << (overlap == 0 ? "quad.\n" : "overlapping triangle strip.\n"); | |
break; | |
case GLFW_KEY_T: | |
draw_triangle ^= 1; | |
std::cout << "Triangle " << (draw_triangle == 1 ? "en" : "dis") << "abled.\n"; | |
break; | |
case GLFW_KEY_B: | |
blend ^= 1; | |
glUniform1i(ui_blendEnabled, blend); | |
std::cout << "Blending " << (blend == 1 ? "en" : "dis") << "abled.\n"; | |
break; | |
} | |
} | |
// Print diagnostics about our support | |
static bool reportSupported(const char *ext) | |
{ | |
std::string glext = "GL_"; | |
glext += ext; | |
bool good = glewIsSupported(glext.c_str()) == GL_TRUE; | |
std::cout << ext << " is" << (good ? " " : " NOT ") << "supported.\n"; | |
return good; | |
} | |
int main() | |
{ | |
if (!glfwInit()) return -1; | |
glfwSetErrorCallback(error_callback); | |
glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, GL_MAJOR); | |
glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, GL_MINOR); | |
// Compatibility profile to avoid some vertex shaders and manual matrix manipulation | |
glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_COMPAT_PROFILE); | |
int width = 640; | |
int height = 480; | |
float ratio; | |
// Setup window and context | |
glfwWindowHint(GLFW_RESIZABLE, GL_FALSE); | |
GLFWwindow* window = glfwCreateWindow(width, height, "GLFW Window", nullptr, nullptr); | |
if (!window) return -1; | |
glfwMakeContextCurrent(window); | |
GLenum err = glewInit(); | |
if (err != GLEW_OK) | |
{ | |
std::cerr << glewGetErrorString(err) << '\n'; | |
return -1; | |
} | |
// Check for OpenGL 4.2 | |
if (!reportSupported(GL_VERSION_STR)) ABORT; | |
glfwSetKeyCallback(window, key_callback); | |
wglSwapIntervalEXT(1); | |
glfwGetFramebufferSize(window, &width, &height); | |
ratio = width / static_cast<float>(height); | |
// Setup textures | |
// Note that we don't need to configure any filtering since we're | |
// using them through images instead of samplers | |
enum { | |
color, | |
depth, | |
stencil, | |
mutex, | |
texnum | |
}; | |
GLuint textures[texnum]; | |
glGenTextures(texnum, textures); | |
// Color/alpha texture | |
glBindTexture(GL_TEXTURE_2D, textures[color]); | |
glTexStorage2D(GL_TEXTURE_2D, 1, GL_RGBA8, width, height); | |
// Depth texture | |
glBindTexture(GL_TEXTURE_2D, textures[depth]); | |
glTexStorage2D(GL_TEXTURE_2D, 1, GL_R16F, width, height); | |
// Stencil texture | |
glBindTexture(GL_TEXTURE_2D, textures[stencil]); | |
glTexStorage2D(GL_TEXTURE_2D, 1, GL_R8UI, width, height); | |
// Mutex buffer | |
glBindTexture(GL_TEXTURE_2D, textures[mutex]); | |
// GL_R32UI (or GL_R32I) necessary for atomic operations | |
glTexStorage2D(GL_TEXTURE_2D, 1, GL_R32UI, width, height); | |
// Initialize the mutex to 0 | |
unsigned int *zeros = new unsigned int[width*height]; | |
memset(zeros, 0, width*height*4); | |
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, width, height, GL_RED, GL_UNSIGNED_INT, zeros); | |
delete[] zeros; | |
glBindImageTexture(0, textures[color], 0, GL_FALSE, 0, GL_READ_WRITE, GL_RGBA8); | |
glBindImageTexture(1, textures[depth], 0, GL_FALSE, 0, GL_READ_WRITE, GL_R16F); | |
glBindImageTexture(2, textures[stencil], 0, GL_FALSE, 0, GL_READ_WRITE, GL_R8UI); | |
glBindImageTexture(3, textures[mutex], 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32UI); | |
// Setup vertex arrays | |
// Note that the triangle is slightly tilted to check the depth test | |
float triangle_array[] = { | |
1.f, 0.f, 0.f, | |
-0.6f, -0.4f, 0.2f, | |
0.f, 1.f, 0.f, | |
0.6f, -0.4f, 0.f, | |
0.f, 0.f, 1.f, | |
0.f, 0.6f, -0.2f | |
}; | |
float square_array[] = { | |
1.f, 0.f, 0.f, | |
-0.8f, -0.8f, 0.f, | |
1.f, 0.f, 0.f, | |
-0.8f, 0.8f, 0.f, | |
1.f, 0.f, 0.f, | |
0.8f, 0.8f, 0.f, | |
1.f, 0.f, 0.f, | |
0.8f, -0.8f, 0.f | |
}; | |
enum { | |
triangle, | |
square, | |
clear, | |
vao_size | |
}; | |
GLuint vao[vao_size]; | |
glGenVertexArrays(vao_size, vao); | |
GLuint vertex_buffers[vao_size]; | |
glGenBuffers(vao_size, vertex_buffers); | |
glBindVertexArray(vao[triangle]); | |
glBindBuffer(GL_ARRAY_BUFFER, vertex_buffers[triangle]); | |
glBufferData(GL_ARRAY_BUFFER, sizeof(triangle_array), triangle_array, GL_STATIC_DRAW); | |
glInterleavedArrays(GL_C3F_V3F, 0, BUFFER_OFFSET(0)); | |
glEnableClientState(GL_VERTEX_ARRAY); | |
glEnableClientState(GL_COLOR_ARRAY); | |
glBindVertexArray(vao[square]); | |
glBindBuffer(GL_ARRAY_BUFFER, vertex_buffers[square]); | |
glBufferData(GL_ARRAY_BUFFER, sizeof(square_array), square_array, GL_STATIC_DRAW); | |
glInterleavedArrays(GL_C3F_V3F, 0, BUFFER_OFFSET(0)); | |
glEnableClientState(GL_VERTEX_ARRAY); | |
glEnableClientState(GL_COLOR_ARRAY); | |
const float clearQuad[] = { | |
-1.f, -1.f, | |
-1.f, 1.f, | |
1.f, 1.f, | |
1.f, -1.f | |
}; | |
glBindVertexArray(vao[clear]); | |
glBindBuffer(GL_ARRAY_BUFFER, vertex_buffers[clear]); | |
glBufferData(GL_ARRAY_BUFFER, sizeof(clearQuad), clearQuad, GL_STATIC_DRAW); | |
glVertexPointer(2, GL_FLOAT, 0, BUFFER_OFFSET(0)); | |
glEnableClientState(GL_VERTEX_ARRAY); | |
// Setup shader | |
GLuint fs = glCreateShader(GL_FRAGMENT_SHADER); | |
GLint compiled, linked; | |
// Sample shader, implements basic depth testing and blending. | |
// The 'coherent' qualifier, as well as the entire mutex image, | |
// are only required when there is a chance of overlap within a | |
// single draw call. | |
const char *fs_source = | |
"#version 420 compatibility\n\ | |
layout (binding = 0, rgba8) uniform coherent image2D color;\n\ | |
layout (binding = 1, r16f) uniform coherent image2D depth;\n\ | |
layout (binding = 2, r8ui) uniform coherent uimage2D stencil;\n\ | |
layout (binding = 3, r32ui) uniform coherent uimage2D mutex;\n\ | |
\n\ | |
uniform int depthTestEnabled;\n\ | |
uniform int depthWriteEnabled;\n\ | |
uniform int blendEnabled;\n\ | |
\n\ | |
void main()\n\ | |
{\n\ | |
float cur_depth = gl_FragCoord.z;\n\ | |
ivec2 pix_coord = ivec2(gl_FragCoord.xy);\n\ | |
\n\ | |
while (imageAtomicCompSwap(mutex, pix_coord, 0, 1) > 0);\n\ | |
\n\ | |
vec4 old_color = imageLoad(color, pix_coord);\n\ | |
float old_depth = imageLoad(depth, pix_coord).r;\n\ | |
uint old_stencil = imageLoad(stencil, pix_coord).r;\n\ | |
vec4 out_color;\n\ | |
if (depthTestEnabled > 0 && cur_depth > old_depth) {\n\ | |
imageStore(mutex, pix_coord, uvec4(0));\n\ | |
discard;\n\ | |
}\n\ | |
if (blendEnabled > 0) out_color = 0.5*gl_Color + 0.5*old_color;\n\ | |
else out_color = gl_Color;\n\ | |
imageStore(color, pix_coord, out_color);\n\ | |
if (depthWriteEnabled > 0) imageStore(depth, pix_coord, vec4(cur_depth));\n\ | |
gl_FragColor = out_color;\n\ | |
imageStore(mutex, pix_coord, uvec4(0));\n\ | |
}"; | |
GLuint program; | |
glShaderSource(fs, 1, &fs_source, 0); | |
glCompileShader(fs); | |
glGetShaderiv(fs, GL_COMPILE_STATUS, &compiled); | |
if (!compiled) { | |
GLint length; | |
GLchar* log; | |
glGetShaderiv(fs, GL_INFO_LOG_LENGTH, &length); | |
log = new GLchar[length]; | |
glGetShaderInfoLog(fs, length, &length, log); | |
std::cerr << "FS compile log = '" << log << "'\n"; | |
delete[] log; | |
ABORT; | |
} | |
program = glCreateProgram(); | |
glAttachShader(program, fs); | |
glDeleteShader(fs); // flagged for deletion when we delete the program | |
glLinkProgram(program); | |
glGetProgramiv(program, GL_LINK_STATUS, &linked); | |
if (linked) | |
{ | |
std::cout << "Program linked successfully\n"; | |
// Get uniform locations | |
ui_depthTestEnabled = glGetUniformLocation(program, "depthTestEnabled"); | |
if (ui_depthTestEnabled == -1) { std::cerr << "Couldn't locate uniform 'depthTestEnabled'.\n"; ABORT;} | |
ui_depthWriteEnabled = glGetUniformLocation(program, "depthWriteEnabled"); | |
if (ui_depthWriteEnabled == -1) { std::cerr << "Couldn't locate uniform 'depthWriteEnabled'.\n"; ABORT;} | |
ui_blendEnabled = glGetUniformLocation(program, "blendEnabled"); | |
if (ui_blendEnabled == -1) { std::cerr << "Couldn't locate uniform 'blendEnabled'.\n"; ABORT;} | |
// Set the initial values for the uniforms | |
glProgramUniform1i(program, ui_depthTestEnabled, dte_value); | |
glProgramUniform1i(program, ui_depthWriteEnabled, 1); | |
glProgramUniform1i(program, ui_blendEnabled, blend); | |
} | |
else | |
{ | |
GLint length; | |
GLchar* log; | |
glGetProgramiv(program, GL_INFO_LOG_LENGTH, &length); | |
log = new GLchar[length]; | |
glGetProgramInfoLog(program, length, &length, log); | |
std::cerr << "Program link log = '" << log << "'\n"; | |
delete[] log; | |
ABORT; | |
} | |
GLuint clear_program; | |
GLuint cvs = glCreateShader(GL_VERTEX_SHADER); | |
GLuint cfs = glCreateShader(GL_FRAGMENT_SHADER); | |
const char *clear_vs_source = | |
"#version 420 compatibility\n\ | |
void main()\n\ | |
{\n\ | |
gl_Position = vec4(gl_Vertex.xy, 0.0, 1.0);\n\ | |
}"; | |
const char *clear_fs_source = | |
"#version 420 compatibility\n\ | |
layout (binding = 0, rgba8) uniform image2D color;\n\ | |
layout (binding = 1, r16f) uniform image2D depth;\n\ | |
layout (binding = 2, r8ui) uniform uimage2D stencil;\n\ | |
uniform vec4 colorclear;\n\ | |
uniform float depthclear;\n\ | |
\n\ | |
void main()\n\ | |
{\n\ | |
ivec2 pix_coord = ivec2(gl_FragCoord.xy);\n\ | |
imageStore(color, pix_coord, colorclear);\n\ | |
imageStore(depth, pix_coord, vec4(depthclear));\n\ | |
imageStore(stencil, pix_coord, uvec4(0));\n\ | |
gl_FragColor = colorclear;\n\ | |
}"; | |
glShaderSource(cvs, 1, &clear_vs_source, 0); | |
glCompileShader(cvs); | |
glGetShaderiv(cvs, GL_COMPILE_STATUS, &compiled); | |
if (!compiled) { | |
GLint length; | |
GLchar* log; | |
glGetShaderiv(cvs, GL_INFO_LOG_LENGTH, &length); | |
log = new GLchar[length]; | |
glGetShaderInfoLog(cvs, length, &length, log); | |
std::cerr << "Clear VS compile log = '" << log << "'\n"; | |
delete[] log; | |
ABORT; | |
} | |
glShaderSource(cfs, 1, &clear_fs_source, 0); | |
glCompileShader(cfs); | |
glGetShaderiv(cfs, GL_COMPILE_STATUS, &compiled); | |
if (!compiled) { | |
GLint length; | |
GLchar* log; | |
glGetShaderiv(cfs, GL_INFO_LOG_LENGTH, &length); | |
log = new GLchar[length]; | |
glGetShaderInfoLog(cfs, length, &length, log); | |
std::cerr << "Clear FS compile log = '" << log << "'\n"; | |
delete[] log; | |
ABORT; | |
} | |
clear_program = glCreateProgram(); | |
glAttachShader(clear_program, cvs); | |
glDeleteShader(cvs); // flagged for deletion when we delete the program | |
glAttachShader(clear_program, cfs); | |
glDeleteShader(cfs); // flagged for deletion when we delete the program | |
glLinkProgram(clear_program); | |
glGetProgramiv(clear_program, GL_LINK_STATUS, &linked); | |
if (linked) | |
{ | |
std::cout << "Clear program linked successfully\n"; | |
// Get uniform locations | |
GLint uv_colorclear = glGetUniformLocation(clear_program, "colorclear"); | |
if (uv_colorclear == -1) { std::cerr << "Couldn't locate uniform 'colorclear'.\n"; ABORT;} | |
GLint uf_depthclear = glGetUniformLocation(clear_program, "depthclear"); | |
if(uf_depthclear == -1) { std::cerr << "Couldn't locate uniform 'depthclear'.\n"; ABORT;} | |
// Set the initial values for the uniforms | |
glProgramUniform4f(clear_program, uv_colorclear, 0.0f, 0.0f, 0.3f, 1.0f); | |
glProgramUniform1f(clear_program, uf_depthclear, 1.0f); | |
} | |
else | |
{ | |
GLint length; | |
GLchar* log; | |
glGetProgramiv(clear_program, GL_INFO_LOG_LENGTH, &length); | |
log = new GLchar[length]; | |
glGetProgramInfoLog(clear_program, length, &length, log); | |
std::cerr << "Clear program link log = '" << log << "'\n"; | |
delete[] log; | |
ABORT; | |
} | |
// Just to test single draw-call overlapping polygons | |
GLenum mode[2] = { GL_QUADS, GL_TRIANGLE_STRIP }; | |
// Projection/viewport setup | |
glViewport(0, 0, width, height); | |
glMatrixMode(GL_PROJECTION); | |
glLoadIdentity(); | |
glOrtho(-ratio, ratio, -1.f, 1.f, 1.f, -1.f); | |
glMatrixMode(GL_MODELVIEW); | |
// Main loop | |
while (!glfwWindowShouldClose(window)) | |
{ | |
glUseProgram(clear_program); | |
glBindVertexArray(vao[clear]); | |
glDrawArrays(GL_QUADS, 0, 4); | |
glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT); | |
glUseProgram(program); | |
glLoadIdentity(); | |
glBindVertexArray(vao[square]); | |
glDrawArrays(mode[overlap], 0, 4); | |
// This barrier for image accesses is necessary between draw calls | |
// to prevent shaders from running out of order, which can seriously | |
// mess up depth/stencil testing and blending operations. | |
glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT); | |
if (draw_triangle) | |
{ | |
glBindVertexArray(vao[triangle]); | |
glRotatef(static_cast<float>(glfwGetTime()) * 50.f, 0.f, 0.f, 1.f); | |
glDrawArrays(GL_TRIANGLES, 0, 3); | |
} | |
glfwSwapBuffers(window); | |
glfwPollEvents(); | |
} | |
CLEANUP: | |
// Cleanup | |
glUseProgram(0); | |
glDeleteProgram(program); | |
glDeleteVertexArrays(vao_size, vao); | |
glDeleteBuffers(vao_size, vertex_buffers); | |
glDeleteTextures(texnum, textures); | |
glfwDestroyWindow(window); | |
glfwTerminate(); | |
if (state != 0) system("pause"); | |
return state; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment