/gist:a3ac583194050081f5bc Secret

## gistfile1.cpp
#include "stdafx.h"
#define MAX_AUDIO_PACKET_SIZE (128 * 1024)

#include <iostream>
#include <fstream>

#include <string>
#include <vector>
#include <map>

#include <deque>
#include <queue>

#include <math.h>
#include <stdlib.h>
#include <stdio.h>
#include <conio.h>

extern "C"
{
#include "libavcodec/avcodec.h"
#include "libavformat/avformat.h"
#include "libavdevice/avdevice.h"
#include "libswscale/swscale.h"
#include "libavutil/dict.h"
#include "libavutil/error.h"
#include "libavutil/opt.h"
#include <libavutil/fifo.h>
#include <libavutil/imgutils.h>
#include <libavutil/samplefmt.h>
#include <libswresample/swresample.h>
}
AVCodecID			outputAudioFormat = AV_CODEC_ID_VORBIS;


static int sws_flags = SWS_BICUBIC;
#define STREAM_DURATION   50.0
#define STREAM_FRAME_RATE 25 /* 25 images/s */
#define STREAM_NB_FRAMES  ((int)(STREAM_DURATION * STREAM_FRAME_RATE))
#define STREAM_PIX_FMT    AV_PIX_FMT_YUV420P /* default pix_fmt */


AVFormatContext*    fmt_ctx= NULL;
int                    audio_stream_index = -1;
AVCodecContext *    codec_ctx_audio = NULL;
AVCodec*            codec_audio = NULL;
AVFrame*            decoded_frame = NULL;
uint8_t**            audio_dst_data = NULL;
int                    got_frame = 0;
int                    audiobufsize = 0;
AVPacket            input_packet;
int                    audio_dst_linesize = 0;
int                    audio_dst_bufsize = 0;
SwrContext *        swrContext = NULL;

AVOutputFormat *    output_format = NULL ;
AVFormatContext *    output_fmt_ctx= NULL;
AVStream *            audio_st = NULL;
AVStream*			video_st = NULL;
AVCodec *            audio_codec = NULL;
AVCodec*			video_codec = NULL;
double                audio_pts = 0.0;
AVFrame *            out_frame = avcodec_alloc_frame();

int                    audio_input_frame_size = 64;

uint8_t *            audio_data_buf = NULL;
uint8_t *            audio_out = NULL;
int                    audio_bit_rate;
int                    audio_sample_rate;
int                    audio_channels;
int					sourceSampleRate=0;
int					destSampleRate = 0;

int					dst_nb_samples = 0;
int					pivotIndex = 0;
int					max_dst_nb_samples = 0;
int					samples_count=0;


int decode_packet();
int open_audio_input(char* src_filename);
int decode_frame();

int open_encoder(char* output_filename);
AVStream *add_audio_stream(AVFormatContext *oc, AVCodec **codec,
    enum AVCodecID codec_id);
int open_audio(AVFormatContext *oc, AVCodec *codec, AVStream *st);
void close_audio(AVFormatContext *oc, AVStream *st);
void write_audio_frame(uint8_t ** audio_src_data, int audio_src_bufsize);

static AVFrame *frame;
static AVPicture src_picture, dst_picture;
static int frame_count;
/* Add an output stream. */
static AVStream *add_stream(AVFormatContext *oc, AVCodec **codec,
                            enum AVCodecID codec_id)
{
    AVCodecContext *c;
    AVStream *st;

    /* find the encoder */
    *codec = avcodec_find_encoder(codec_id);
    if (!(*codec)) {
        fprintf(stderr, "Could not find encoder for '%s'\n",
                avcodec_get_name(codec_id));
        exit(1);
    }

    st = avformat_new_stream(oc, *codec);
    if (!st) {
        fprintf(stderr, "Could not allocate stream\n");
        exit(1);
    }
    st->id = oc->nb_streams-1;
    c = st->codec;

    switch ((*codec)->type) {
    case AVMEDIA_TYPE_AUDIO:
        c->sample_fmt  = (*codec)->sample_fmts ?
            (*codec)->sample_fmts[0] : AV_SAMPLE_FMT_FLTP;
        c->bit_rate    = 64000;
        c->sample_rate = 44100;
        c->channels    = 2;
        break;

    case AVMEDIA_TYPE_VIDEO:
        c->codec_id = codec_id;

        c->bit_rate = 400000;
        /* Resolution must be a multiple of two. */
        c->width    = 352;
        c->height   = 288;
        /* timebase: This is the fundamental unit of time (in seconds) in terms
         * of which frame timestamps are represented. For fixed-fps content,
         * timebase should be 1/framerate and timestamp increments should be
         * identical to 1. */
        c->time_base.den = STREAM_FRAME_RATE;
        c->time_base.num = 1;
        c->gop_size      = 12; /* emit one intra frame every twelve frames at most */
        c->pix_fmt       = STREAM_PIX_FMT;
        if (c->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
            /* just for testing, we also add B frames */
            c->max_b_frames = 2;
        }
        if (c->codec_id == AV_CODEC_ID_MPEG1VIDEO) {
            /* Needed to avoid using macroblocks in which some coeffs overflow.
             * This does not happen with normal video, it just happens here as
             * the motion of the chroma plane does not match the luma plane. */
            c->mb_decision = 2;
        }
    break;

    default:
        break;
    }

    /* Some formats want stream headers to be separate. */
    if (oc->oformat->flags & AVFMT_GLOBALHEADER)
        c->flags |= CODEC_FLAG_GLOBAL_HEADER;

    return st;
}


static void open_video(AVFormatContext *oc, AVCodec *codec, AVStream *st)
{
    int ret;
    AVCodecContext *c = st->codec;

    /* open the codec */
    ret = avcodec_open2(c, codec, NULL);
    if (ret < 0) {
        //fprintf(stderr, "Could not open video codec: %s\n", av_err2str(ret));
        exit(1);
    }

    /* allocate and init a re-usable frame */
    frame = av_frame_alloc();
    if (!frame) {
        fprintf(stderr, "Could not allocate video frame\n");
        exit(1);
    }
    frame->format = c->pix_fmt;
    frame->width = c->width;
    frame->height = c->height;

    /* Allocate the encoded raw picture. */
    ret = avpicture_alloc(&dst_picture, c->pix_fmt, c->width, c->height);
    if (ret < 0) {
        //fprintf(stderr, "Could not allocate picture: %s\n", av_err2str(ret));
        exit(1);
    }

    /* If the output format is not YUV420P, then a temporary YUV420P
     * picture is needed too. It is then converted to the required
     * output format. */
    if (c->pix_fmt != AV_PIX_FMT_YUV420P) {
        ret = avpicture_alloc(&src_picture, AV_PIX_FMT_YUV420P, c->width, c->height);
        if (ret < 0) {
            //fprintf(stderr, "Could not allocate temporary picture: %s\n",
            //        av_err2str(ret));
            exit(1);
        }
    }

    /* copy data and linesize picture pointers to frame */
    *((AVPicture *)frame) = dst_picture;
}

int open_audio_input(char* src_filename)
{
    int i =0;
    /* open input file, and allocate format context */
    if (avformat_open_input(&fmt_ctx, src_filename, NULL, NULL) < 0)
    {
        fprintf(stderr, "Could not open source file %s\n", src_filename);
        exit(1);
    }

    // Retrieve stream information
    if(avformat_find_stream_info(fmt_ctx, NULL)<0)
        return -1; // Couldn't find stream information

    // Dump information about file onto standard error
    av_dump_format(fmt_ctx, 0, src_filename, 0);

    // Find the first video stream
    for(i=0; i<fmt_ctx->nb_streams; i++)
    {
        if(fmt_ctx->streams[i]->codec->codec_type==AVMEDIA_TYPE_AUDIO)
        {
            audio_stream_index=i;
            break;
        }
    }
    if ( audio_stream_index != -1 )
    {
        // Get a pointer to the codec context for the audio stream
        codec_ctx_audio=fmt_ctx->streams[audio_stream_index]->codec;

        // Find the decoder for the video stream
        codec_audio=avcodec_find_decoder(codec_ctx_audio->codec_id);
        if(codec_audio==NULL) {
            fprintf(stderr, "Unsupported audio codec!\n");
            return -1; // Codec not found
        }

        // Open codec
        AVDictionary *codecDictOptions = NULL;
        if(avcodec_open2(codec_ctx_audio, codec_audio, &codecDictOptions)<0)
            return -1; // Could not open codec

        // Set up SWR context once you've got codec information
        swrContext = swr_alloc();
        av_opt_set_int(swrContext, "in_channel_layout",  codec_ctx_audio->channel_layout, 0);
        av_opt_set_int(swrContext, "out_channel_layout", codec_ctx_audio->channel_layout,  0);
        av_opt_set_int(swrContext, "in_sample_rate",     codec_ctx_audio->sample_rate, 0);
        av_opt_set_int(swrContext, "out_sample_rate",    codec_ctx_audio->sample_rate, 0);
        av_opt_set_sample_fmt(swrContext, "in_sample_fmt",  codec_ctx_audio->sample_fmt, 0);
		if ( outputAudioFormat == AV_CODEC_ID_VORBIS )
		{
			av_opt_set_sample_fmt(swrContext, "out_sample_fmt", AV_SAMPLE_FMT_FLTP,  0);
		}
		else
		{
			av_opt_set_sample_fmt(swrContext, "out_sample_fmt", AV_SAMPLE_FMT_S16,  0);
		}
        int rv = swr_init(swrContext);

		sourceSampleRate	=	destSampleRate = codec_ctx_audio->sample_rate;

		// Allocate audio frame
        if ( decoded_frame == NULL ) decoded_frame = avcodec_alloc_frame();
        int nb_planes = 0;
        AVStream* audio_stream = fmt_ctx->streams[audio_stream_index];
        nb_planes = av_sample_fmt_is_planar(codec_ctx_audio->sample_fmt) ? codec_ctx_audio->channels : 1;
        int tempSize =  sizeof(uint8_t *) * nb_planes;
        audio_dst_data = (uint8_t**)av_mallocz(tempSize);
        if (!audio_dst_data)
        {
            fprintf(stderr, "Could not allocate audio data buffers\n");
        }
        else
        {
            for ( int i = 0 ; i < nb_planes ; i ++ )
            {
                audio_dst_data[i] = NULL;
            }
        }
    }
}

int decode_frame()
{
    int rv = 0;
    got_frame = 0;
    if ( fmt_ctx == NULL  )
    {
        return rv;
    }
    int ret = 0;
    audiobufsize = 0;
    rv = av_read_frame(fmt_ctx, &input_packet);
    if ( rv < 0 )
    {
        return rv;
    }
    rv = decode_packet();
    // Free the input_packet that was allocated by av_read_frame
    av_free_packet(&input_packet);
    return rv;
}

int decode_packet()
{
    int rv = 0;
    int ret = 0;

    //audio stream?
    if(input_packet.stream_index == audio_stream_index)
    {
		avcodec_get_frame_defaults(decoded_frame);
		while( input_packet.size > 0 )
		{
			int result = avcodec_decode_audio4(codec_ctx_audio, decoded_frame, &got_frame, &input_packet);
			if ( result < 0)
			{
				fprintf(stderr, "Error decoding audio frame\n");
				//return ret;
			}
			else
			{
				if ( got_frame )
				{
					dst_nb_samples = (int)av_rescale_rnd(swr_get_delay(swrContext, sourceSampleRate) + decoded_frame->nb_samples, sourceSampleRate, destSampleRate, AV_ROUND_UP);
					if ( dst_nb_samples > max_dst_nb_samples )
					{
						max_dst_nb_samples = dst_nb_samples;
						if ( audio_dst_data[0] )
						{
							av_freep(&audio_dst_data[0]);
							audio_dst_data[0] = NULL;
						}
					}
					if ( audio_dst_data[0] == NULL )
					{
						if ( outputAudioFormat == AV_CODEC_ID_VORBIS )
						{
							ret = av_samples_alloc(audio_dst_data, &audio_dst_linesize, codec_ctx_audio->channels,
								decoded_frame->nb_samples, (AVSampleFormat)AV_SAMPLE_FMT_FLTP, 0);
						}
						else
						{
							ret = av_samples_alloc(audio_dst_data, &audio_dst_linesize, codec_ctx_audio->channels,
								decoded_frame->nb_samples, (AVSampleFormat)AV_SAMPLE_FMT_S16, 0);
						}
					}
					/* TODO: extend return code of the av_samples_* functions so that this call is not needed */
					int resampled  = swr_convert(swrContext, audio_dst_data, out_frame->nb_samples,
						(const uint8_t **)(decoded_frame->extended_data), decoded_frame->nb_samples);
					char str[900]="";
					sprintf(str,"out_frame->nb_samples:\t%d; decoded_frame->nb_samples:\t%d",out_frame->nb_samples,decoded_frame->nb_samples );
					if ( outputAudioFormat == AV_CODEC_ID_VORBIS )
					{
						audio_dst_bufsize  = av_samples_get_buffer_size(&audio_dst_linesize, decoded_frame->channels, resampled, (AVSampleFormat)AV_SAMPLE_FMT_FLTP, 1);
					}
					else
					{
						audio_dst_bufsize  = av_samples_get_buffer_size(&audio_dst_linesize, decoded_frame->channels, resampled, (AVSampleFormat)AV_SAMPLE_FMT_S16, 1);
					}

					input_packet.size -= result;
					input_packet.data += result;
				}
				else
				{
					input_packet.size	=	0;
					input_packet.data	=	NULL;
				}
			}
		}
    }
    return rv;
}

int open_encoder(char* output_filename )
{
    int rv = 0;

    /* allocate the output media context */
    AVOutputFormat *opfmt = NULL;

    avformat_alloc_output_context2(&output_fmt_ctx, opfmt, NULL, output_filename);
    if (!output_fmt_ctx) {
        printf("Could not deduce output format from file extension: using MPEG.\n");
        avformat_alloc_output_context2(&output_fmt_ctx, NULL, "mpeg", output_filename);
    }
    if (!output_fmt_ctx) {
        rv = -1;
    }
    else
    {
        output_format = output_fmt_ctx->oformat;
    }

    /* Add the audio stream using the default format codecs
    * and initialize the codecs. */
    audio_st = NULL;

    if ( output_fmt_ctx )
    {
        if (output_format->audio_codec != AV_CODEC_ID_NONE)
        {
            audio_st = add_audio_stream(output_fmt_ctx, &audio_codec, output_format->audio_codec);
        }

        /* Now that all the parameters are set, we can open the audio and
        * video codecs and allocate the necessary encode buffers. */

        if (audio_st)
        {
            rv = open_audio(output_fmt_ctx, audio_codec, audio_st);
            if ( rv < 0 ) return rv;
        }

        av_dump_format(output_fmt_ctx, 0, output_filename, 1);
        /* open the output file, if needed */
        if (!(output_format->flags & AVFMT_NOFILE))
        {
            if (avio_open(&output_fmt_ctx->pb, output_filename, AVIO_FLAG_WRITE) < 0) {
                fprintf(stderr, "Could not open '%s'\n", output_filename);
                rv = -1;
            }
            else
            {
                /* Write the stream header, if any. */
                if (avformat_write_header(output_fmt_ctx, NULL) < 0)
                {
                    fprintf(stderr, "Error occurred when opening output file\n");
                    rv = -1;
                }
            }
        }
    }

    return rv;
}

AVStream *add_audio_stream(AVFormatContext *oc, AVCodec **codec,
    enum AVCodecID codec_id)
{
    AVCodecContext *c;
    AVStream *st;

    /* find the audio encoder */
    *codec = avcodec_find_encoder(codec_id);
    if (!(*codec)) {
        fprintf(stderr, "Could not find codec\n");
        exit(1);
    }

    st = avformat_new_stream(oc, *codec);
    if (!st) {
        fprintf(stderr, "Could not allocate stream\n");
        exit(1);
    }
    st->id = 1;

    c = st->codec;

    /* put sample parameters */
	if ( outputAudioFormat == AV_CODEC_ID_VORBIS )
	{
		c->sample_fmt  = AV_SAMPLE_FMT_FLTP;
	}
	else
	{
		c->sample_fmt  = AV_SAMPLE_FMT_S16;
	}

    c->bit_rate    = audio_bit_rate;
    c->sample_rate = audio_sample_rate;
    c->channels    = audio_channels;

    // some formats want stream headers to be separate
    if (oc->oformat->flags & AVFMT_GLOBALHEADER)
        c->flags |= CODEC_FLAG_GLOBAL_HEADER;

    return st;
}

int open_audio(AVFormatContext *oc, AVCodec *codec, AVStream *st)
{
    int ret=0;
    AVCodecContext *c;

    st->duration = fmt_ctx->duration;
    c = st->codec;

    /* open it */
    ret = avcodec_open2(c, codec, NULL) ;
    if ( ret < 0)
    {
        fprintf(stderr, "could not open codec\n");
        return -1;
        //exit(1);
    }

    if (c->codec->capabilities & CODEC_CAP_VARIABLE_FRAME_SIZE)
        audio_input_frame_size = 10000;
    else
        audio_input_frame_size = c->frame_size;
	out_frame->nb_samples = audio_input_frame_size;
    int tempSize = audio_input_frame_size *
        av_get_bytes_per_sample(c->sample_fmt) *
        c->channels;
    return ret;
}

void close_audio(AVFormatContext *oc, AVStream *st)
{
    avcodec_close(st->codec);
}

void write_audio_frame(uint8_t ** audio_dst_data, int audio_dst_bufsize)
{
    AVFormatContext *oc = output_fmt_ctx;
    AVStream *st = audio_st;
    if ( oc == NULL || st == NULL ) return;
    AVCodecContext *c;
    AVPacket pkt = { 0 }; // data and size must be 0;
    int got_packet=0, ret=0;

	av_init_packet(&pkt);
	c = st->codec;

	out_frame->nb_samples = audio_input_frame_size;

	AVRational r;
	r.num = 1;
	r.den = c->sample_rate;
    out_frame->pts = av_rescale_q(samples_count, (AVRational)r, c->time_base);
    avcodec_fill_audio_frame(out_frame, c->channels, c->sample_fmt,
                             audio_dst_data[0], audio_dst_bufsize, 0);
    samples_count += out_frame->nb_samples;

    ret = avcodec_encode_audio2(c, &pkt, out_frame, &got_packet);
    if (ret < 0)
	{
        return;
    }

    if (!got_packet)
        return;

    /* rescale output packet timestamp values from codec to stream timebase */
    pkt.pts = av_rescale_q_rnd(pkt.pts, c->time_base, st->time_base, (AVRounding )(AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX));
    pkt.dts = av_rescale_q_rnd(pkt.dts, c->time_base, st->time_base, (AVRounding )(AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX));
    pkt.duration = av_rescale_q(pkt.duration, c->time_base, st->time_base);
    pkt.stream_index = st->index;

	char str[999]="";
	sprintf(str,"out_frame->nb_samples:\t%d",out_frame->nb_samples);
    /* Write the compressed frame to the media file. */
    ret = av_interleaved_write_frame(oc, &pkt);
    if (ret != 0)
	{
        exit(1);
    }
    av_free_packet(&pkt);
}

void write_delayed_frames(AVFormatContext *oc, AVStream *st)
{
    AVCodecContext *c = st->codec;
    int got_output = 0;
    int ret = 0;
    AVPacket pkt;
    pkt.data = NULL;
    pkt.size = 0;
    av_init_packet(&pkt);
    int i = 0;
    for (got_output = 1; got_output; i++)
    {
        ret = avcodec_encode_audio2(c, &pkt, NULL, &got_output);
        if (ret < 0)
        {
            fprintf(stderr, "error encoding frame\n");
            exit(1);
        }
        static int64_t tempPts = 0;
        static int64_t tempDts = 0;
        /* If size is zero, it means the image was buffered. */
        if (got_output)
        {
			pkt.pts = av_rescale_q_rnd(pkt.pts, c->time_base, st->time_base, (AVRounding )(AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX));
			pkt.dts = av_rescale_q_rnd(pkt.dts, c->time_base, st->time_base, (AVRounding )(AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX));
			pkt.duration = av_rescale_q(pkt.duration, c->time_base, st->time_base);
			pkt.stream_index = st->index;
            if ( c && c->coded_frame && c->coded_frame->key_frame)
                pkt.flags |= AV_PKT_FLAG_KEY;
            /* Write the compressed frame to the media file. */
            ret = av_interleaved_write_frame(oc, &pkt);
        }
        else
        {
            ret = 0;
        }
        av_free_packet(&pkt);
    }
}

int main(int argc, char **argv)
{
    /* register all formats and codecs */
    av_register_all();
    avcodec_register_all();
    avformat_network_init();
    avdevice_register_all();
    int i =0;
	int ret=0;
    char src_filename[90] = "test.mp2";
    char dst_filename[90] = "output.webm";
	outputAudioFormat = AV_CODEC_ID_VORBIS;
    open_audio_input(src_filename);
	if ( codec_ctx_audio->bit_rate == 0 ) codec_ctx_audio->bit_rate = 112000;
    audio_bit_rate        = codec_ctx_audio->bit_rate;
    audio_sample_rate    = codec_ctx_audio->sample_rate;
    audio_channels        = codec_ctx_audio->channels;
    open_encoder( dst_filename );
	int frames= 0;
    while(1)
    {
        int rv = decode_frame();
        if ( rv < 0 )
        {
            break;
        }

		if (audio_st)
		{
			audio_pts = audio_st->pts.val * av_q2d(audio_st->time_base);
		}
		else
		{
			audio_pts = 0.0;
		}
        if ( codec_ctx_audio )
        {
			if ( got_frame )
			{
				write_audio_frame( audio_dst_data, audio_dst_bufsize );
				frames++;
			}
		}
        printf("\naudio_pts: %f", audio_pts);
    }
	while(1)
	{
		dst_nb_samples = (int)av_rescale_rnd(swr_get_delay(swrContext, sourceSampleRate) + decoded_frame->nb_samples, sourceSampleRate, destSampleRate, AV_ROUND_UP);
		if ( dst_nb_samples > max_dst_nb_samples )
		{
			max_dst_nb_samples = dst_nb_samples;
			if ( audio_dst_data[0] )
			{
				av_freep(&audio_dst_data[0]);
				audio_dst_data[0] = NULL;
			}
		}
		if ( audio_dst_data[0] == NULL )
		{
			if ( outputAudioFormat == AV_CODEC_ID_VORBIS )
			{
				ret = av_samples_alloc(audio_dst_data, NULL, codec_ctx_audio->channels,
					decoded_frame->nb_samples, (AVSampleFormat)AV_SAMPLE_FMT_FLTP, 0);
			}
			else
			{
				ret = av_samples_alloc(audio_dst_data, NULL, codec_ctx_audio->channels,
					decoded_frame->nb_samples, (AVSampleFormat)AV_SAMPLE_FMT_S16, 0);
			}
		}
		int resampled = swr_convert(swrContext, audio_dst_data, out_frame->nb_samples,NULL, 0);
		if ( outputAudioFormat == AV_CODEC_ID_VORBIS )
		{
			audio_dst_bufsize  = av_samples_get_buffer_size(&audio_dst_linesize, decoded_frame->channels, resampled, (AVSampleFormat)AV_SAMPLE_FMT_FLTP, 1);
		}
		else
		{
			audio_dst_bufsize  = av_samples_get_buffer_size(&audio_dst_linesize, decoded_frame->channels, resampled, (AVSampleFormat)AV_SAMPLE_FMT_S16, 1);
		}
		if ( audio_dst_bufsize <= 0 ) break;
		audio_pts = audio_st->pts.val * av_q2d(audio_st->time_base);
		printf("\naudio_pts: %f", audio_pts);
		write_audio_frame( audio_dst_data, audio_dst_bufsize );
	}
    write_delayed_frames( output_fmt_ctx, audio_st );
    av_write_trailer(output_fmt_ctx);
    close_audio( output_fmt_ctx, audio_st);
    swr_free(&swrContext);
    avcodec_free_frame(&out_frame);
	getch();
    return 0;
}
	#include "stdafx.h"
	#define MAX_AUDIO_PACKET_SIZE (128 * 1024)

	#include <iostream>
	#include <fstream>

	#include <string>
	#include <vector>
	#include <map>

	#include <deque>
	#include <queue>

	#include <math.h>
	#include <stdlib.h>
	#include <stdio.h>
	#include <conio.h>

	extern "C"
	{
	#include "libavcodec/avcodec.h"
	#include "libavformat/avformat.h"
	#include "libavdevice/avdevice.h"
	#include "libswscale/swscale.h"
	#include "libavutil/dict.h"
	#include "libavutil/error.h"
	#include "libavutil/opt.h"
	#include <libavutil/fifo.h>
	#include <libavutil/imgutils.h>
	#include <libavutil/samplefmt.h>
	#include <libswresample/swresample.h>
	}
	AVCodecID outputAudioFormat = AV_CODEC_ID_VORBIS;


	static int sws_flags = SWS_BICUBIC;
	#define STREAM_DURATION 50.0
	#define STREAM_FRAME_RATE 25 /* 25 images/s */
	#define STREAM_NB_FRAMES ((int)(STREAM_DURATION * STREAM_FRAME_RATE))
	#define STREAM_PIX_FMT AV_PIX_FMT_YUV420P /* default pix_fmt */


	AVFormatContext* fmt_ctx= NULL;
	int audio_stream_index = -1;
	AVCodecContext * codec_ctx_audio = NULL;
	AVCodec* codec_audio = NULL;
	AVFrame* decoded_frame = NULL;
	uint8_t** audio_dst_data = NULL;
	int got_frame = 0;
	int audiobufsize = 0;
	AVPacket input_packet;
	int audio_dst_linesize = 0;
	int audio_dst_bufsize = 0;
	SwrContext * swrContext = NULL;

	AVOutputFormat * output_format = NULL ;
	AVFormatContext * output_fmt_ctx= NULL;
	AVStream * audio_st = NULL;
	AVStream* video_st = NULL;
	AVCodec * audio_codec = NULL;
	AVCodec* video_codec = NULL;
	double audio_pts = 0.0;
	AVFrame * out_frame = avcodec_alloc_frame();

	int audio_input_frame_size = 64;

	uint8_t * audio_data_buf = NULL;
	uint8_t * audio_out = NULL;
	int audio_bit_rate;
	int audio_sample_rate;
	int audio_channels;
	int sourceSampleRate=0;
	int destSampleRate = 0;

	int dst_nb_samples = 0;
	int pivotIndex = 0;
	int max_dst_nb_samples = 0;
	int samples_count=0;


	int decode_packet();
	int open_audio_input(char* src_filename);
	int decode_frame();

	int open_encoder(char* output_filename);
	AVStream add_audio_stream(AVFormatContext oc, AVCodec **codec,
	enum AVCodecID codec_id);
	int open_audio(AVFormatContext oc, AVCodec codec, AVStream *st);
	void close_audio(AVFormatContext oc, AVStream st);
	void write_audio_frame(uint8_t ** audio_src_data, int audio_src_bufsize);

	static AVFrame *frame;
	static AVPicture src_picture, dst_picture;
	static int frame_count;
	/* Add an output stream. */
	static AVStream add_stream(AVFormatContext oc, AVCodec **codec,
	enum AVCodecID codec_id)
	{
	AVCodecContext *c;
	AVStream *st;

	/* find the encoder */
	*codec = avcodec_find_encoder(codec_id);
	if (!(*codec)) {
	fprintf(stderr, "Could not find encoder for '%s'\n",
	avcodec_get_name(codec_id));
	exit(1);
	}

	st = avformat_new_stream(oc, *codec);
	if (!st) {
	fprintf(stderr, "Could not allocate stream\n");
	exit(1);
	}
	st->id = oc->nb_streams-1;
	c = st->codec;

	switch ((*codec)->type) {
	case AVMEDIA_TYPE_AUDIO:
	c->sample_fmt = (*codec)->sample_fmts ?
	(*codec)->sample_fmts[0] : AV_SAMPLE_FMT_FLTP;
	c->bit_rate = 64000;
	c->sample_rate = 44100;
	c->channels = 2;
	break;

	case AVMEDIA_TYPE_VIDEO:
	c->codec_id = codec_id;

	c->bit_rate = 400000;
	/* Resolution must be a multiple of two. */
	c->width = 352;
	c->height = 288;
	/* timebase: This is the fundamental unit of time (in seconds) in terms
	* of which frame timestamps are represented. For fixed-fps content,
	* timebase should be 1/framerate and timestamp increments should be
	* identical to 1. */
	c->time_base.den = STREAM_FRAME_RATE;
	c->time_base.num = 1;
	c->gop_size = 12; /* emit one intra frame every twelve frames at most */
	c->pix_fmt = STREAM_PIX_FMT;
	if (c->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
	/* just for testing, we also add B frames */
	c->max_b_frames = 2;
	}
	if (c->codec_id == AV_CODEC_ID_MPEG1VIDEO) {
	/* Needed to avoid using macroblocks in which some coeffs overflow.
	* This does not happen with normal video, it just happens here as
	* the motion of the chroma plane does not match the luma plane. */
	c->mb_decision = 2;
	}
	break;

	default:
	break;
	}

	/* Some formats want stream headers to be separate. */
	if (oc->oformat->flags & AVFMT_GLOBALHEADER)
	c->flags \|= CODEC_FLAG_GLOBAL_HEADER;

	return st;
	}


	static void open_video(AVFormatContext oc, AVCodec codec, AVStream *st)
	{
	int ret;
	AVCodecContext *c = st->codec;

	/* open the codec */
	ret = avcodec_open2(c, codec, NULL);
	if (ret < 0) {
	//fprintf(stderr, "Could not open video codec: %s\n", av_err2str(ret));
	exit(1);
	}

	/* allocate and init a re-usable frame */
	frame = av_frame_alloc();
	if (!frame) {
	fprintf(stderr, "Could not allocate video frame\n");
	exit(1);
	}
	frame->format = c->pix_fmt;
	frame->width = c->width;
	frame->height = c->height;

	/* Allocate the encoded raw picture. */
	ret = avpicture_alloc(&dst_picture, c->pix_fmt, c->width, c->height);
	if (ret < 0) {
	//fprintf(stderr, "Could not allocate picture: %s\n", av_err2str(ret));
	exit(1);
	}

	/* If the output format is not YUV420P, then a temporary YUV420P
	* picture is needed too. It is then converted to the required
	* output format. */
	if (c->pix_fmt != AV_PIX_FMT_YUV420P) {
	ret = avpicture_alloc(&src_picture, AV_PIX_FMT_YUV420P, c->width, c->height);
	if (ret < 0) {
	//fprintf(stderr, "Could not allocate temporary picture: %s\n",
	// av_err2str(ret));
	exit(1);
	}
	}

	/* copy data and linesize picture pointers to frame */
	((AVPicture )frame) = dst_picture;
	}

	int open_audio_input(char* src_filename)
	{
	int i =0;
	/* open input file, and allocate format context */
	if (avformat_open_input(&fmt_ctx, src_filename, NULL, NULL) < 0)
	{
	fprintf(stderr, "Could not open source file %s\n", src_filename);
	exit(1);
	}

	// Retrieve stream information
	if(avformat_find_stream_info(fmt_ctx, NULL)<0)
	return -1; // Couldn't find stream information

	// Dump information about file onto standard error
	av_dump_format(fmt_ctx, 0, src_filename, 0);

	// Find the first video stream
	for(i=0; i<fmt_ctx->nb_streams; i++)
	{
	if(fmt_ctx->streams[i]->codec->codec_type==AVMEDIA_TYPE_AUDIO)
	{
	audio_stream_index=i;
	break;
	}
	}
	if ( audio_stream_index != -1 )
	{
	// Get a pointer to the codec context for the audio stream
	codec_ctx_audio=fmt_ctx->streams[audio_stream_index]->codec;

	// Find the decoder for the video stream
	codec_audio=avcodec_find_decoder(codec_ctx_audio->codec_id);
	if(codec_audio==NULL) {
	fprintf(stderr, "Unsupported audio codec!\n");
	return -1; // Codec not found
	}

	// Open codec
	AVDictionary *codecDictOptions = NULL;
	if(avcodec_open2(codec_ctx_audio, codec_audio, &codecDictOptions)<0)
	return -1; // Could not open codec

	// Set up SWR context once you've got codec information
	swrContext = swr_alloc();
	av_opt_set_int(swrContext, "in_channel_layout", codec_ctx_audio->channel_layout, 0);
	av_opt_set_int(swrContext, "out_channel_layout", codec_ctx_audio->channel_layout, 0);
	av_opt_set_int(swrContext, "in_sample_rate", codec_ctx_audio->sample_rate, 0);
	av_opt_set_int(swrContext, "out_sample_rate", codec_ctx_audio->sample_rate, 0);
	av_opt_set_sample_fmt(swrContext, "in_sample_fmt", codec_ctx_audio->sample_fmt, 0);
	if ( outputAudioFormat == AV_CODEC_ID_VORBIS )
	{
	av_opt_set_sample_fmt(swrContext, "out_sample_fmt", AV_SAMPLE_FMT_FLTP, 0);
	}
	else
	{
	av_opt_set_sample_fmt(swrContext, "out_sample_fmt", AV_SAMPLE_FMT_S16, 0);
	}
	int rv = swr_init(swrContext);

	sourceSampleRate = destSampleRate = codec_ctx_audio->sample_rate;

	// Allocate audio frame
	if ( decoded_frame == NULL ) decoded_frame = avcodec_alloc_frame();
	int nb_planes = 0;
	AVStream* audio_stream = fmt_ctx->streams[audio_stream_index];
	nb_planes = av_sample_fmt_is_planar(codec_ctx_audio->sample_fmt) ? codec_ctx_audio->channels : 1;
	int tempSize = sizeof(uint8_t ) nb_planes;
	audio_dst_data = (uint8_t**)av_mallocz(tempSize);
	if (!audio_dst_data)
	{
	fprintf(stderr, "Could not allocate audio data buffers\n");
	}
	else
	{
	for ( int i = 0 ; i < nb_planes ; i ++ )
	{
	audio_dst_data[i] = NULL;
	}
	}
	}
	}

	int decode_frame()
	{
	int rv = 0;
	got_frame = 0;
	if ( fmt_ctx == NULL )
	{
	return rv;
	}
	int ret = 0;
	audiobufsize = 0;
	rv = av_read_frame(fmt_ctx, &input_packet);
	if ( rv < 0 )
	{
	return rv;
	}
	rv = decode_packet();
	// Free the input_packet that was allocated by av_read_frame
	av_free_packet(&input_packet);
	return rv;
	}

	int decode_packet()
	{
	int rv = 0;
	int ret = 0;

	//audio stream?
	if(input_packet.stream_index == audio_stream_index)
	{
	avcodec_get_frame_defaults(decoded_frame);
	while( input_packet.size > 0 )
	{
	int result = avcodec_decode_audio4(codec_ctx_audio, decoded_frame, &got_frame, &input_packet);
	if ( result < 0)
	{
	fprintf(stderr, "Error decoding audio frame\n");
	//return ret;
	}
	else
	{
	if ( got_frame )
	{
	dst_nb_samples = (int)av_rescale_rnd(swr_get_delay(swrContext, sourceSampleRate) + decoded_frame->nb_samples, sourceSampleRate, destSampleRate, AV_ROUND_UP);
	if ( dst_nb_samples > max_dst_nb_samples )
	{
	max_dst_nb_samples = dst_nb_samples;
	if ( audio_dst_data[0] )
	{
	av_freep(&audio_dst_data[0]);
	audio_dst_data[0] = NULL;
	}
	}
	if ( audio_dst_data[0] == NULL )
	{
	if ( outputAudioFormat == AV_CODEC_ID_VORBIS )
	{
	ret = av_samples_alloc(audio_dst_data, &audio_dst_linesize, codec_ctx_audio->channels,
	decoded_frame->nb_samples, (AVSampleFormat)AV_SAMPLE_FMT_FLTP, 0);
	}
	else
	{
	ret = av_samples_alloc(audio_dst_data, &audio_dst_linesize, codec_ctx_audio->channels,
	decoded_frame->nb_samples, (AVSampleFormat)AV_SAMPLE_FMT_S16, 0);
	}
	}
	/* TODO: extend return code of the av_samples_* functions so that this call is not needed */
	int resampled = swr_convert(swrContext, audio_dst_data, out_frame->nb_samples,
	(const uint8_t **)(decoded_frame->extended_data), decoded_frame->nb_samples);
	char str[900]="";
	sprintf(str,"out_frame->nb_samples:\t%d; decoded_frame->nb_samples:\t%d",out_frame->nb_samples,decoded_frame->nb_samples );
	if ( outputAudioFormat == AV_CODEC_ID_VORBIS )
	{
	audio_dst_bufsize = av_samples_get_buffer_size(&audio_dst_linesize, decoded_frame->channels, resampled, (AVSampleFormat)AV_SAMPLE_FMT_FLTP, 1);
	}
	else
	{
	audio_dst_bufsize = av_samples_get_buffer_size(&audio_dst_linesize, decoded_frame->channels, resampled, (AVSampleFormat)AV_SAMPLE_FMT_S16, 1);
	}

	input_packet.size -= result;
	input_packet.data += result;
	}
	else
	{
	input_packet.size = 0;
	input_packet.data = NULL;
	}
	}
	}
	}
	return rv;
	}

	int open_encoder(char* output_filename )
	{
	int rv = 0;

	/* allocate the output media context */
	AVOutputFormat *opfmt = NULL;

	avformat_alloc_output_context2(&output_fmt_ctx, opfmt, NULL, output_filename);
	if (!output_fmt_ctx) {
	printf("Could not deduce output format from file extension: using MPEG.\n");
	avformat_alloc_output_context2(&output_fmt_ctx, NULL, "mpeg", output_filename);
	}
	if (!output_fmt_ctx) {
	rv = -1;
	}
	else
	{
	output_format = output_fmt_ctx->oformat;
	}

	/* Add the audio stream using the default format codecs
	* and initialize the codecs. */
	audio_st = NULL;

	if ( output_fmt_ctx )
	{
	if (output_format->audio_codec != AV_CODEC_ID_NONE)
	{
	audio_st = add_audio_stream(output_fmt_ctx, &audio_codec, output_format->audio_codec);
	}

	/* Now that all the parameters are set, we can open the audio and
	* video codecs and allocate the necessary encode buffers. */

	if (audio_st)
	{
	rv = open_audio(output_fmt_ctx, audio_codec, audio_st);
	if ( rv < 0 ) return rv;
	}

	av_dump_format(output_fmt_ctx, 0, output_filename, 1);
	/* open the output file, if needed */
	if (!(output_format->flags & AVFMT_NOFILE))
	{
	if (avio_open(&output_fmt_ctx->pb, output_filename, AVIO_FLAG_WRITE) < 0) {
	fprintf(stderr, "Could not open '%s'\n", output_filename);
	rv = -1;
	}
	else
	{
	/* Write the stream header, if any. */
	if (avformat_write_header(output_fmt_ctx, NULL) < 0)
	{
	fprintf(stderr, "Error occurred when opening output file\n");
	rv = -1;
	}
	}
	}
	}

	return rv;
	}

	AVStream add_audio_stream(AVFormatContext oc, AVCodec **codec,
	enum AVCodecID codec_id)
	{
	AVCodecContext *c;
	AVStream *st;

	/* find the audio encoder */
	*codec = avcodec_find_encoder(codec_id);
	if (!(*codec)) {
	fprintf(stderr, "Could not find codec\n");
	exit(1);
	}

	st = avformat_new_stream(oc, *codec);
	if (!st) {
	fprintf(stderr, "Could not allocate stream\n");
	exit(1);
	}
	st->id = 1;

	c = st->codec;

	/* put sample parameters */
	if ( outputAudioFormat == AV_CODEC_ID_VORBIS )
	{
	c->sample_fmt = AV_SAMPLE_FMT_FLTP;
	}
	else
	{
	c->sample_fmt = AV_SAMPLE_FMT_S16;
	}

	c->bit_rate = audio_bit_rate;
	c->sample_rate = audio_sample_rate;
	c->channels = audio_channels;

	// some formats want stream headers to be separate
	if (oc->oformat->flags & AVFMT_GLOBALHEADER)
	c->flags \|= CODEC_FLAG_GLOBAL_HEADER;

	return st;
	}

	int open_audio(AVFormatContext oc, AVCodec codec, AVStream *st)
	{
	int ret=0;
	AVCodecContext *c;

	st->duration = fmt_ctx->duration;
	c = st->codec;

	/* open it */
	ret = avcodec_open2(c, codec, NULL) ;
	if ( ret < 0)
	{
	fprintf(stderr, "could not open codec\n");
	return -1;
	//exit(1);
	}

	if (c->codec->capabilities & CODEC_CAP_VARIABLE_FRAME_SIZE)
	audio_input_frame_size = 10000;
	else
	audio_input_frame_size = c->frame_size;
	out_frame->nb_samples = audio_input_frame_size;
	int tempSize = audio_input_frame_size *
	av_get_bytes_per_sample(c->sample_fmt) *
	c->channels;
	return ret;
	}

	void close_audio(AVFormatContext oc, AVStream st)
	{
	avcodec_close(st->codec);
	}

	void write_audio_frame(uint8_t ** audio_dst_data, int audio_dst_bufsize)
	{
	AVFormatContext *oc = output_fmt_ctx;
	AVStream *st = audio_st;
	if ( oc == NULL \|\| st == NULL ) return;
	AVCodecContext *c;
	AVPacket pkt = { 0 }; // data and size must be 0;
	int got_packet=0, ret=0;

	av_init_packet(&pkt);
	c = st->codec;

	out_frame->nb_samples = audio_input_frame_size;

	AVRational r;
	r.num = 1;
	r.den = c->sample_rate;
	out_frame->pts = av_rescale_q(samples_count, (AVRational)r, c->time_base);
	avcodec_fill_audio_frame(out_frame, c->channels, c->sample_fmt,
	audio_dst_data[0], audio_dst_bufsize, 0);
	samples_count += out_frame->nb_samples;

	ret = avcodec_encode_audio2(c, &pkt, out_frame, &got_packet);
	if (ret < 0)
	{
	return;
	}

	if (!got_packet)
	return;

	/* rescale output packet timestamp values from codec to stream timebase */
	pkt.pts = av_rescale_q_rnd(pkt.pts, c->time_base, st->time_base, (AVRounding )(AV_ROUND_NEAR_INF\|AV_ROUND_PASS_MINMAX));
	pkt.dts = av_rescale_q_rnd(pkt.dts, c->time_base, st->time_base, (AVRounding )(AV_ROUND_NEAR_INF\|AV_ROUND_PASS_MINMAX));
	pkt.duration = av_rescale_q(pkt.duration, c->time_base, st->time_base);
	pkt.stream_index = st->index;

	char str[999]="";
	sprintf(str,"out_frame->nb_samples:\t%d",out_frame->nb_samples);
	/* Write the compressed frame to the media file. */
	ret = av_interleaved_write_frame(oc, &pkt);
	if (ret != 0)
	{
	exit(1);
	}
	av_free_packet(&pkt);
	}

	void write_delayed_frames(AVFormatContext oc, AVStream st)
	{
	AVCodecContext *c = st->codec;
	int got_output = 0;
	int ret = 0;
	AVPacket pkt;
	pkt.data = NULL;
	pkt.size = 0;
	av_init_packet(&pkt);
	int i = 0;
	for (got_output = 1; got_output; i++)
	{
	ret = avcodec_encode_audio2(c, &pkt, NULL, &got_output);
	if (ret < 0)
	{
	fprintf(stderr, "error encoding frame\n");
	exit(1);
	}
	static int64_t tempPts = 0;
	static int64_t tempDts = 0;
	/* If size is zero, it means the image was buffered. */
	if (got_output)
	{
	pkt.pts = av_rescale_q_rnd(pkt.pts, c->time_base, st->time_base, (AVRounding )(AV_ROUND_NEAR_INF\|AV_ROUND_PASS_MINMAX));
	pkt.dts = av_rescale_q_rnd(pkt.dts, c->time_base, st->time_base, (AVRounding )(AV_ROUND_NEAR_INF\|AV_ROUND_PASS_MINMAX));
	pkt.duration = av_rescale_q(pkt.duration, c->time_base, st->time_base);
	pkt.stream_index = st->index;
	if ( c && c->coded_frame && c->coded_frame->key_frame)
	pkt.flags \|= AV_PKT_FLAG_KEY;
	/* Write the compressed frame to the media file. */
	ret = av_interleaved_write_frame(oc, &pkt);
	}
	else
	{
	ret = 0;
	}
	av_free_packet(&pkt);
	}
	}

	int main(int argc, char **argv)
	{
	/* register all formats and codecs */
	av_register_all();
	avcodec_register_all();
	avformat_network_init();
	avdevice_register_all();
	int i =0;
	int ret=0;
	char src_filename[90] = "test.mp2";
	char dst_filename[90] = "output.webm";
	outputAudioFormat = AV_CODEC_ID_VORBIS;
	open_audio_input(src_filename);
	if ( codec_ctx_audio->bit_rate == 0 ) codec_ctx_audio->bit_rate = 112000;
	audio_bit_rate = codec_ctx_audio->bit_rate;
	audio_sample_rate = codec_ctx_audio->sample_rate;
	audio_channels = codec_ctx_audio->channels;
	open_encoder( dst_filename );
	int frames= 0;
	while(1)
	{
	int rv = decode_frame();
	if ( rv < 0 )
	{
	break;
	}

	if (audio_st)
	{
	audio_pts = audio_st->pts.val * av_q2d(audio_st->time_base);
	}
	else
	{
	audio_pts = 0.0;
	}
	if ( codec_ctx_audio )
	{
	if ( got_frame )
	{
	write_audio_frame( audio_dst_data, audio_dst_bufsize );
	frames++;
	}
	}
	printf("\naudio_pts: %f", audio_pts);
	}
	while(1)
	{
	dst_nb_samples = (int)av_rescale_rnd(swr_get_delay(swrContext, sourceSampleRate) + decoded_frame->nb_samples, sourceSampleRate, destSampleRate, AV_ROUND_UP);
	if ( dst_nb_samples > max_dst_nb_samples )
	{
	max_dst_nb_samples = dst_nb_samples;
	if ( audio_dst_data[0] )
	{
	av_freep(&audio_dst_data[0]);
	audio_dst_data[0] = NULL;
	}
	}
	if ( audio_dst_data[0] == NULL )
	{
	if ( outputAudioFormat == AV_CODEC_ID_VORBIS )
	{
	ret = av_samples_alloc(audio_dst_data, NULL, codec_ctx_audio->channels,
	decoded_frame->nb_samples, (AVSampleFormat)AV_SAMPLE_FMT_FLTP, 0);
	}
	else
	{
	ret = av_samples_alloc(audio_dst_data, NULL, codec_ctx_audio->channels,
	decoded_frame->nb_samples, (AVSampleFormat)AV_SAMPLE_FMT_S16, 0);
	}
	}
	int resampled = swr_convert(swrContext, audio_dst_data, out_frame->nb_samples,NULL, 0);
	if ( outputAudioFormat == AV_CODEC_ID_VORBIS )
	{
	audio_dst_bufsize = av_samples_get_buffer_size(&audio_dst_linesize, decoded_frame->channels, resampled, (AVSampleFormat)AV_SAMPLE_FMT_FLTP, 1);
	}
	else
	{
	audio_dst_bufsize = av_samples_get_buffer_size(&audio_dst_linesize, decoded_frame->channels, resampled, (AVSampleFormat)AV_SAMPLE_FMT_S16, 1);
	}
	if ( audio_dst_bufsize <= 0 ) break;
	audio_pts = audio_st->pts.val * av_q2d(audio_st->time_base);
	printf("\naudio_pts: %f", audio_pts);
	write_audio_frame( audio_dst_data, audio_dst_bufsize );
	}
	write_delayed_frames( output_fmt_ctx, audio_st );
	av_write_trailer(output_fmt_ctx);
	close_audio( output_fmt_ctx, audio_st);
	swr_free(&swrContext);
	avcodec_free_frame(&out_frame);
	getch();
	return 0;
	}