Skip to content

Instantly share code, notes, and snippets.

@roxlu
Created January 29, 2013 11:25
Show Gist options
  • Star 12 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save roxlu/4663550 to your computer and use it in GitHub Desktop.
Save roxlu/4663550 to your computer and use it in GitHub Desktop.
Fast texture uploads using pixel buffer objects. Improved upload of a 768x1366 texture from 16-20ms to 1-3ms (we can improve the performance a bit more by using GPU default pixel formats)
#include <shared/VideoSurface.h>
GLuint VideoSurface::prog = 0;
GLint VideoSurface::u_pm = 0;
GLint VideoSurface::u_mm = 0;
GLint VideoSurface::u_tex = 0;
GLfloat VideoSurface::pm[16] = {0};
VideoSurface::VideoSurface()
:width(0)
,height(0)
,num_bytes(0)
,read_dx(0)
,write_dx(0)
,tex(0)
,vao(0)
,vbo(0)
{
if(VideoSurface::prog == 0) {
prog = rx_create_shader(VIDEO_SURFACE_VS, VIDEO_SURFACE_FS);
glBindAttribLocation(prog, 0, "a_pos");
glBindAttribLocation(prog, 1, "a_tex");
glLinkProgram(prog);
u_mm = glGetUniformLocation(prog, "u_mm");
u_pm = glGetUniformLocation(prog, "u_pm");
u_tex = glGetUniformLocation(prog, "u_tex");
float n = 0.0;
float f = 10.0;
float ww = APP_WIDTH;
float hh = APP_HEIGHT;
float fmn = f - n;
pm[15] = 1.0f;
pm[0] = 2.0f / ww;
pm[5] = 2.0f / -hh;
pm[10] = -2.0f / fmn;
pm[12] = -(ww)/ww;
pm[13] = -(hh)/-hh;
pm[14] = -(f+n)/fmn;
}
}
VideoSurface::~VideoSurface() {
width = 0;
height = 0;
num_bytes = 0;
read_dx = 0;
write_dx = 0;
if(tex) {
glDeleteTextures(1, &tex);
tex = 0;
}
if(vbo) {
glDeleteBuffers(1, &vbo);
vbo = 0;
}
// @todo cleaning up VAO crashes
}
void VideoSurface::setup(unsigned int w, unsigned int h) {
width = w;
height = h;
num_bytes = w * h * 4;
glBindTexture(GL_TEXTURE_RECTANGLE, 0);
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
glUseProgram(0);
#if defined(VIDEO_SURFACE_USE_PBOS)
glGenBuffers(VIDEO_SURFACE_NUM_PBOS, pbos);
for(int i = 0; i < VIDEO_SURFACE_NUM_PBOS; ++i) {
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbos[i]);
glBufferData(GL_PIXEL_UNPACK_BUFFER, num_bytes, NULL, GL_STREAM_DRAW);
}
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
#endif
glGenTextures(1, &tex);
glBindTexture(GL_TEXTURE_RECTANGLE, tex);
glTexImage2D(GL_TEXTURE_RECTANGLE, 0, GL_RGBA, w, h, 0, VIDEO_SURFACE_GPU_PIXEL_FORMAT, GL_UNSIGNED_BYTE, 0);
glTexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glGenVertexArrays(1, &vao);
glBindVertexArray(vao);
GLfloat vertices[] = {
0.0f, 0.0f, 0.0f, 0.0f,
w, 0.0f, w, 0.0f,
w, h, w, h,
0.0f, 0.0f, 0.0f, 0.0f,
w, h, w, h,
0.0f, h, 0.0f, h
};
glGenBuffers(1, &vbo);
glBindBuffer(GL_ARRAY_BUFFER, vbo);
glBufferData(GL_ARRAY_BUFFER, sizeof(vertices), vertices, GL_STATIC_DRAW);
glEnableVertexAttribArray(0); // pos
glEnableVertexAttribArray(1); // tex
glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, sizeof(float) * 4, (GLvoid*)0);
glVertexAttribPointer(1, 2, GL_FLOAT, GL_FALSE, sizeof(float) * 4, (GLvoid*)8);
}
void VideoSurface::draw(int x, int y) {
glDepthMask(GL_FALSE);
glDisable(GL_CULL_FACE);
mm.setPosition(x, y, -5.0);
glPointSize(15);
glBindVertexArray(vao);
glUseProgram(prog);
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_RECTANGLE, tex);
glUniform1i(u_tex, 0);
glUniformMatrix4fv(u_mm, 1, GL_FALSE, mm.getPtr());
glUniformMatrix4fv(u_pm, 1, GL_FALSE, pm);
glDrawArrays(GL_TRIANGLES, 0, 6);
glDepthMask(GL_TRUE);
}
void VideoSurface::setPixels(unsigned char* pixels) {
if(!pixels) {
printf("WARNING: VideoSurface::setPixels(), given pixels is NULL.\n");
return;
}
if(!tex || width == 0 || height == 0) {
printf("WARNING: VideoSurface::setPixels(): cannot set, we're not initialized.\n");
return;
}
#if defined(VIDEO_SURFACE_USE_PBOS)
VIDEO_SURFACE_TIMER_START
read_dx = (read_dx + 1) % VIDEO_SURFACE_NUM_PBOS;
write_dx = (read_dx + 1) % VIDEO_SURFACE_NUM_PBOS;
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbos[read_dx]);
glBindTexture(GL_TEXTURE_RECTANGLE, tex);
glTexSubImage2D(GL_TEXTURE_RECTANGLE, 0, 0, 0, width, height, VIDEO_SURFACE_GPU_PIXEL_FORMAT, GL_UNSIGNED_BYTE, 0);
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbos[write_dx]);
glBufferData(GL_PIXEL_UNPACK_BUFFER, num_bytes, NULL, GL_STREAM_DRAW);
GLubyte* ptr = (GLubyte*)glMapBuffer(GL_PIXEL_UNPACK_BUFFER, GL_WRITE_ONLY);
if(ptr) {
memcpy(ptr, pixels, num_bytes);
glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER);
}
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
VIDEO_SURFACE_TIMER_END
#else
VIDEO_SURFACE_TIMER_START
glBindTexture(GL_TEXTURE_RECTANGLE, tex);
glTexSubImage2D(GL_TEXTURE_RECTANGLE, 0, 0, 0, width, height, VIDEO_SURFACE_GPU_PIXEL_FORMAT, GL_UNSIGNED_BYTE, pixels);
VIDEO_SURFACE_TIMER_END
#endif
}
#ifndef APOLLO_VIDEO_SURFACE_H
#define APOLLO_VIDEO_SURFACE_H
#include <roxlu/Roxlu.h>
#include <shared/Types.h>
//#define VIDEO_SURFACE_TIMER
#if defined(VIDEO_SURFACE_TIMER)
# define VIDEO_SURFACE_TIMER_START rx_uint64 now = rx_millis();
# define VIDEO_SURFACE_TIMER_END rx_uint64 d = rx_millis() - now; printf("Video surface timer: %lld\n", d);
#else
# define VIDEO_SURFACE_TIMER_START
# define VIDEO_SURFACE_TIMER_END
#endif
#define VIDEO_SURFACE_GPU_PIXEL_FORMAT GL_RGBA
#define VIDEO_SURFACE_USE_PBOS
#define VIDEO_SURFACE_NUM_PBOS 2
static const char* VIDEO_SURFACE_VS = GLSL(120,
attribute vec4 a_pos;
attribute vec2 a_tex;
varying vec2 v_tex;
uniform mat4 u_pm;
uniform mat4 u_mm;
void main() {
gl_Position = u_pm * u_mm * a_pos;
v_tex = a_tex;
}
);
static const char* VIDEO_SURFACE_FS = GLSL(120,
uniform sampler2DRect u_tex;
varying vec2 v_tex;
void main() {
gl_FragColor.a = 1.0;
gl_FragColor.rgb = texture2DRect(u_tex, v_tex).rgb;
}
);
class VideoSurface {
public:
VideoSurface();
~VideoSurface();
void setup(unsigned int w, unsigned int h);
void setPixels(unsigned char* pixels);
void draw(int x, int y);
private:
static GLuint prog;
static GLint u_pm;
static GLint u_mm;
static GLint u_tex;
static GLfloat pm[16];
GLuint tex;
GLuint vao;
GLuint vbo;
Mat4 mm;
unsigned int width;
unsigned int height;
size_t num_bytes;
GLuint pbos[VIDEO_SURFACE_NUM_PBOS];
unsigned int read_dx;
unsigned int write_dx;
};
#endif
@pixelnerve
Copy link

You can remove the modulo operator by doing:
read_dx = write_dx;
write_dx = 1-write_dx;

and replacing glBufferData /glMapBuffer with glMapBufferRange will give you access to some new flags to tip the gpu on data managing, e.g. GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT (gl3.x)

@roxlu
Copy link
Author

roxlu commented Jun 7, 2013

Thanks pixelnerve, but that wouldn't work when the number of PBOs isn't 2 anymore.
Cool those hints regarding glmap. I'm now looking into YUV conversion and check if that
will improve performance even more.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment