Skip to content

Instantly share code, notes, and snippets.

Created November 19, 2013 20:54
Show Gist options
  • Save rcolinray/7552384 to your computer and use it in GitHub Desktop.
Save rcolinray/7552384 to your computer and use it in GitHub Desktop.
OpenGL-FFMpeg integration
// Use OpenGL 3.0+, but don't use GLU
#define GLFW_NO_GLU
#include <GL/glfw.h>
#include <glm/glm.hpp>
#include <glm/gtc/matrix_transform.hpp>
#include <glm/gtc/type_ptr.hpp>
extern "C" {
#include <libavformat/avformat.h>
#include <libavcodec/avcodec.h>
#include <libavfilter/avfilter.h>
#include <libavdevice/avdevice.h>
#include <libswresample/swresample.h>
#include <libswscale/swscale.h>
#include <libavutil/avutil.h>
#include <sys/time.h>
#include <iostream>
#include <fstream>
#include <string>
std::string const vert_shader_source =
"#version 150\n"
"in vec3 vertex;\n"
"in vec2 texCoord0;\n"
"uniform mat4 mvpMatrix;\n"
"out vec2 texCoord;\n"
"void main() {\n"
" gl_Position = mvpMatrix * vec4(vertex, 1.0);\n"
" texCoord = texCoord0;\n"
std::string const frag_shader_source =
"#version 150\n"
"uniform sampler2D frameTex;\n"
"in vec2 texCoord;\n"
"out vec4 fragColor;\n"
"void main() {\n"
" fragColor = texture(frameTex, texCoord);\n"
#define BUFFER_OFFSET(i) ((char *)NULL + (i))
// attribute indices
enum {
// uniform indices
enum {
// app data structure
typedef struct {
AVFormatContext *fmt_ctx;
int stream_idx;
AVStream *video_stream;
AVCodecContext *codec_ctx;
AVCodec *decoder;
AVPacket *packet;
AVFrame *av_frame;
AVFrame *gl_frame;
struct SwsContext *conv_ctx;
GLuint vao;
GLuint vert_buf;
GLuint elem_buf;
GLuint frame_tex;
GLuint program;
GLuint attribs[2];
GLuint uniforms[2];
} AppData;
// initialize the app data structure
void initializeAppData(AppData *data) {
data->fmt_ctx = NULL;
data->stream_idx = -1;
data->video_stream = NULL;
data->codec_ctx = NULL;
data->decoder = NULL;
data->av_frame = NULL;
data->gl_frame = NULL;
data->conv_ctx = NULL;
// clean up the app data structure
void clearAppData(AppData *data) {
if (data->av_frame) av_free(data->av_frame);
if (data->gl_frame) av_free(data->gl_frame);
if (data->packet) av_free(data->packet);
if (data->codec_ctx) avcodec_close(data->codec_ctx);
if (data->fmt_ctx) avformat_free_context(data->fmt_ctx);
glDeleteVertexArrays(1, &data->vao);
glDeleteBuffers(1, &data->vert_buf);
glDeleteBuffers(1, &data->elem_buf);
glDeleteTextures(1, &data->frame_tex);
// read a video frame
bool readFrame(AppData *data) {
do {
if (av_read_frame(data->fmt_ctx, data->packet) < 0) {
return false;
if (data->packet->stream_index == data->stream_idx) {
int frame_finished = 0;
if (avcodec_decode_video2(data->codec_ctx, data->av_frame, &frame_finished,
data->packet) < 0) {
return false;
if (frame_finished) {
if (!data->conv_ctx) {
data->conv_ctx = sws_getContext(data->codec_ctx->width,
data->codec_ctx->height, data->codec_ctx->pix_fmt,
data->codec_ctx->width, data->codec_ctx->height, PIX_FMT_RGB24,
sws_scale(data->conv_ctx, data->av_frame->data, data->av_frame->linesize, 0,
data->codec_ctx->height, data->gl_frame->data, data->gl_frame->linesize);
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, data->codec_ctx->width,
data->codec_ctx->height, GL_RGB, GL_UNSIGNED_BYTE,
} while (data->packet->stream_index != data->stream_idx);
return true;
bool buildShader(std::string const &shader_source, GLuint &shader, GLenum type) {
int size = shader_source.length();
shader = glCreateShader(type);
char const *c_shader_source = shader_source.c_str();
glShaderSource(shader, 1, (GLchar const **)&c_shader_source, &size);
GLint status;
glGetShaderiv(shader, GL_COMPILE_STATUS, &status);
if (status != GL_TRUE) {
std::cout << "failed to compile shader" << std::endl;
int length;
glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &length);
char *log = new char[length];
glGetShaderInfoLog(shader, length, &length, log);
std::cout << log << std::endl;
delete[] log;
return false;
return true;
// initialize shaders
bool buildProgram(AppData *data) {
GLuint v_shader, f_shader;
if (!buildShader(vert_shader_source, v_shader, GL_VERTEX_SHADER)) {
std::cout << "failed to build vertex shader" << std::endl;
return false;
if (!buildShader(frag_shader_source, f_shader, GL_FRAGMENT_SHADER)) {
std::cout << "failed to build fragment shader" << std::endl;
return false;
data->program = glCreateProgram();
glAttachShader(data->program, v_shader);
glAttachShader(data->program, f_shader);
GLint status;
glGetProgramiv(data->program, GL_LINK_STATUS, &status);
if (status != GL_TRUE) {
std::cout << "failed to link program" << std::endl;
int length;
glGetProgramiv(data->program, GL_INFO_LOG_LENGTH, &length);
char *log = new char[length];
glGetShaderInfoLog(data->program, length, &length, log);
std::cout << log << std::endl;
delete[] log;
return false;
data->uniforms[MVP_MATRIX] = glGetUniformLocation(data->program, "mvpMatrix");
data->uniforms[FRAME_TEX] = glGetUniformLocation(data->program, "frameTex");
data->attribs[VERTICES] = glGetAttribLocation(data->program, "vertex");
data->attribs[TEX_COORDS] = glGetAttribLocation(data->program, "texCoord0");
return true;
// draw frame in opengl context
void drawFrame(AppData *data) {
glBindTexture(GL_TEXTURE_2D, data->frame_tex);
int main(int argc, char *argv[]) {
if (argc < 2) {
std::cout << "provide a filename" << std::endl;
return -1;
// initialize libav
// initialize custom data structure
AppData data;
// open video
if (avformat_open_input(&data.fmt_ctx, argv[1], NULL, NULL) < 0) {
std::cout << "failed to open input" << std::endl;
return -1;
// find stream info
if (avformat_find_stream_info(data.fmt_ctx, NULL) < 0) {
std::cout << "failed to get stream info" << std::endl;
return -1;
// dump debug info
av_dump_format(data.fmt_ctx, 0, argv[1], 0);
// find the video stream
for (unsigned int i = 0; i < data.fmt_ctx->nb_streams; ++i)
if (data.fmt_ctx->streams[i]->codec->codec_type == AVMEDIA_TYPE_VIDEO)
data.stream_idx = i;
if (data.stream_idx == -1)
std::cout << "failed to find video stream" << std::endl;
return -1;
data.video_stream = data.fmt_ctx->streams[data.stream_idx];
data.codec_ctx = data.video_stream->codec;
// find the decoder
data.decoder = avcodec_find_decoder(data.codec_ctx->codec_id);
if (data.decoder == NULL)
std::cout << "failed to find decoder" << std::endl;
return -1;
// open the decoder
if (avcodec_open2(data.codec_ctx, data.decoder, NULL) < 0)
std::cout << "failed to open codec" << std::endl;
return -1;
// allocate the video frames
data.av_frame = avcodec_alloc_frame();
data.gl_frame = avcodec_alloc_frame();
int size = avpicture_get_size(PIX_FMT_RGB24, data.codec_ctx->width,
uint8_t *internal_buffer = (uint8_t *)av_malloc(size * sizeof(uint8_t));
avpicture_fill((AVPicture *)data.gl_frame, internal_buffer, PIX_FMT_RGB24,
data.codec_ctx->width, data.codec_ctx->height);
data.packet = (AVPacket *)av_malloc(sizeof(AVPacket));
// initialize glfw
if (!glfwInit()) {
std::cout << "glfw failed to init" << std::endl;
return -1;
// open a window
float aspect = (float)data.codec_ctx->width / (float)data.codec_ctx->height;
int adj_width = aspect * 300;
int adj_height = 300;
if (!glfwOpenWindow(adj_width, adj_height, 0, 0, 0, 0, 0, 0, GLFW_WINDOW)) {
std::cout << "failed to open window" << std::endl;
return -1;
// initialize opengl
glClearColor(0.0f, 0.0f, 0.0f, 0.0f);
// initialize shaders
if (!buildProgram(&data)) {
std::cout << "failed to initialize shaders" << std::endl;
return -1;
// initialize renderable
glGenVertexArrays(1, &data.vao);
glGenBuffers(1, &data.vert_buf);
glBindBuffer(GL_ARRAY_BUFFER, data.vert_buf);
float quad[20] = {
-1.0f, 1.0f, 0.0f, 0.0f, 0.0f,
-1.0f, -1.0f, 0.0f, 0.0f, 1.0f,
1.0f, -1.0f, 0.0f, 1.0f, 1.0f,
1.0f, 1.0f, 0.0f, 1.0f, 0.0f
glBufferData(GL_ARRAY_BUFFER, sizeof(quad), quad, GL_STATIC_DRAW);
glVertexAttribPointer(data.attribs[VERTICES], 3, GL_FLOAT, GL_FALSE, 20,
glVertexAttribPointer(data.attribs[TEX_COORDS], 2, GL_FLOAT, GL_FALSE, 20,
glGenBuffers(1, &data.elem_buf);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, data.elem_buf);
unsigned char elem[6] = {
0, 1, 2,
0, 2, 3
glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(elem), elem, GL_STATIC_DRAW);
glGenTextures(1, &data.frame_tex);
glBindTexture(GL_TEXTURE_2D, data.frame_tex);
glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, data.codec_ctx->width, data.codec_ctx->height,
glUniform1i(data.uniforms[FRAME_TEX], 0);
glm::mat4 mvp = glm::ortho(-1.0f, 1.0f, -1.0f, 1.0f, -1.0f, 1.0f);
glUniformMatrix4fv(data.uniforms[MVP_MATRIX], 1, GL_FALSE, glm::value_ptr(mvp));
bool running = true;
// run the application mainloop
while (readFrame(&data) && running) {
running = !glfwGetKey(GLFW_KEY_ESC) && glfwGetWindowParam(GLFW_OPENED);
// clean up
Copy link

Is this a legitimate reference to follow for decoding and displaying videos using OpenGL?

Copy link

batousik commented Jul 7, 2019


Copy link

lattice0 commented Jul 28, 2019

Do you have any idea on how to do what you did but totally in hardware? I mean, in ffmpeg, you can decode to hardware. Instead of getting the video back to CPU memory, I'd like to apply color conversion in the GPU using Open GL. If you know anything, please give me a hint. Thanks!

Copy link

Sorry, I don't keep up with the latest FFMpeg developments anymore. I haven't used this code in years - I have no idea if it even works still.

Copy link

batousik commented Jul 29, 2019

Sorry, I don't keep up with the latest FFMpeg developments anymore. I haven't used this code in years - I have no idea if it even works still.

👍 :)

Copy link

Do you have any idea on how to do what you did but totally in hardware? I mean, in ffmpeg, you can decode to hardware. Instead of getting the video back to CPU memory, I'd like to apply color conversion in the GPU using Open GL. If you know anything, please give me a hint. Thanks!

I'm working towards a similar solution, though I am building the c++ lib for the android environment. If you will get any insights, please share. @LucasZanella

I am trying to minimize delay for an rtsp stream coming from the ip camera. I have started with decoding, scaling and converting YUV -> RGB with FFMPEG, but found that i get better performance offloading scaling and converting using a shader.

Copy link

hi @batousik, I will also try to integrate android in my project, I'm right now writing and testing a big dockerfile that can compile for linux android and windows.

Here's my project:

I'm also dealing with RTSP streams from cameras, you can see that I've tested rendering on qt and gtk, and the next target is android. I didn't try yet to render on android, but I'm curious on how you're doing it.

Are you converting yuv to rgb using sw_scale? I guess that you are passing the data to the shader from CPU memory to GPU memory. I was trying to do GPU decoding -> GPU color conversion -> GPU rendering, directly without CPU memory access. It's possible on desktop but I don't know about phones.

Let's share knowledge!

ps: things on my project tree are pretty much broken right now, so if you try to build things, it'll likely not work, but if you need any help, please call

Copy link

batousik commented Jul 31, 2019

@LucasZanella, Hey!

Just got it kinda working yesterday.

I am a Java dev, getting up to this point was a nightmare :) I have followed TheCherno for his c++ and OpenGL course.

I got all of the ffmpeg/android repos I could from git to get insight on what is ffmpeg and how it works. The docs and reference are kinda confusing (and c syntax is very hard for me to get around with)

Also this blog explains the internals of the video streaming in an easy way

Using this project I have finally managed to compile ffmpeg for android (in config file enable network for rtsp)

Now i have a static queue in my c code, ive used this project for the queue. I have an OpenGL java view and the respective C++ code which is called through JNI for the rendering process. The renderer is a clone from TheCherno OpenGL tutorial. I ve got two triangles and a texture, next im calling glTexSubImage2D on each frame object i dequeue.

I have profiled ffmpeg/opengl windows test app, and swscale took like 90% of the video processing pipe. For now i am only receiving black and white video. So i have simplified my code down to:

  1. Get avpacket
  2. Decode
  3. Push a *avframe to the thread safe queue
  4. Dequeue by renderer
  5. Update glTexSubImage2D
  6. Scaling is done by opengl
  7. Shader takes the y component, applies a bit of math and creates color where all r,g,b = y - randomFloat * randomFloat and a =1.0, the link here is full of magic yuv conversions... I get the main picture on how it should be done, but the actual math doesnt make sense (so grayscale for now)

I could tell you more later, dont have the sources on me and check out your project

PS. About hardware acceleration, id love to use it but im properly stack on how to proceed. I am targeting API 19 (for at least opengles3) and google made the MediaCodec class, but its not available via c++ api till API 21 or something like that. There is OPENMAXAL standard that i dont know quite yet what it is. I got a picture in my head that decoder should create a buffer on a gpu and tell opengl to use it. Soon i will be getting HD video streams and things are not looking that good :)

Copy link

@batousik I'm using a nice RTSP lib in my project: it does the NALU parsing automatically, so I didn't have to learn about this low level stuff. When I pass the packets from this lib to ffmpeg, it simply works.

I'm also using the same project you used to compile for Android, but I'm creating a dockerfile for the building process. I modified the project a little bit, you can find at my project under docker/ffmpeg_builder

I didn't understand, how can you render from C++? I thought it was only possible through Java.

What you mean by scaling in opengl? You mean just scaling or color conversion too?

Copy link

Google has a bunch of example projects

Copy link

@batousik THANKS, this is very useful to me!!!!!

Copy link

batousik commented Jul 31, 2019

Here(ndk) and here(Java) there is information about MediaCodec. Maybe it might give more insight on how to do hw decoding.

I have also got LIVE555 project compiled for android and used that to process rtsp on the side, but i have no decoder to feed the data to. (Not trying with ffmpeg since this path only gives me software decoding)

For now ffmpeg does the processing of the rtsp stream + decoding

Copy link

What you mean by scaling in opengl? You mean just scaling or color conversion too?

  • Color conversion is done by the shader (i am getting black and white stream and i dont actually have colors so i am just using the Y from YUV to create glColor(y, y, y, 1.0) )
  • Scaling is done by open gl, cause it just stretches the given image up to the texture size

Copy link

batousik commented Aug 1, 2019

Shader code

static const char FRAGMENT_SHADER[] =
        "#version 300 es\n"
        "precision mediump float;\n"
        "layout(location = 0) out vec4 color;\n"
        "in vec2 v_TexCoord;\n"
        "uniform sampler2D u_Texture_y;\n"
        "void main() {\n"
        "    float y = texture(u_Texture_y, v_TexCoord).r;\n"
        "    y = 1.1643 * (y - 0.0625);\n"
        "    color = vec4(y, y, y, 1.0);\n"

@LucasZanella is this y = 1.1643 * (y - 0.0625) required or I can just use the y component?

Copy link

I am targeting API 19 (for at least opengles3) and google made the MediaCodec class, but its not available via c++ api till API 21 or something like that.

Could you tell me about this MediaCodec class? C++?

Copy link

there is limited or no access to MediaCodec from c++ with API 19. I am currently stuck with further implementation. Since emulator is able to play the video, but the actual phones cannot.

Copy link

hradec commented Jul 19, 2021

Just in case someone falls here and want to build this code, I just did it today, and had to make some adjustments to account for newer ffmpeg API changes (version > 55), and a glew initialization that was needed to build shaders.

You can find the fully "buildable" code in here:

there's also a g++ command line example to build, and don't forget to install glfw2... it won't build with glfw3!

Copy link

tuduweb commented Oct 29, 2022

hi @batousik, I will also try to integrate android in my project, I'm right now writing and testing a big dockerfile that can compile for linux android and windows.

Here's my project:

I'm also dealing with RTSP streams from cameras, you can see that I've tested rendering on qt and gtk, and the next target is android. I didn't try yet to render on android, but I'm curious on how you're doing it.

Are you converting yuv to rgb using sw_scale? I guess that you are passing the data to the shader from CPU memory to GPU memory. I was trying to do GPU decoding -> GPU color conversion -> GPU rendering, directly without CPU memory access. It's possible on desktop but I don't know about phones.

Let's share knowledge!

ps: things on my project tree are pretty much broken right now, so if you try to build things, it'll likely not work, but if you need any help, please call

GPU decoding -> GPU color conversion -> GPU rendering, directly without CPU.
Hi guy. is this program work? And could you please give me some reference article about this solution? Thx a lot. @lattice0

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment