moonpfe/audio-resampling-by-using-avfiler.patch

## audio-resampling-by-using-avfiler.patch
diff --git a/edc-rtsp.c b/edc-rtsp.c
index 161ca75d..e73aecdf 100644
--- a/edc-rtsp.c
+++ b/edc-rtsp.c
@@ -20,6 +20,11 @@
 #include <libswresample/swresample.h>
 #include <libavutil/random_seed.h>
 #include <libavutil/intreadwrite.h>
+#include <libavutil/timestamp.h>
+#include <libavfilter/avfilter.h>
+#include <libavfilter/buffersrc.h>
+#include <libavfilter/buffersink.h>
+#include <libavutil/timestamp.h>

 #ifndef _
 #if defined(linux) && defined(PACKAGE)
@@ -71,6 +76,10 @@ struct _EdcRtspConvert
   int                output_bytes_per_sample;
   int                output_sample_rate;
   int                output_channels;
+
+  AVFilterGraph   *graph;
+  AVFilterContext *src;
+  AVFilterContext *sink;
 };

 typedef enum _EdcRtspSourceType
@@ -109,7 +118,6 @@ struct _EdcRtspAudioOut
   uint32_t    ssrc;
 };

-
 /**
  * EdcRtsp object
  */
@@ -120,6 +128,8 @@ struct _EdcRtsp
   EdcRtspFrameRecv *frame_recv;
   EdcRtspErrorFunc  err_func;

+  int64_t base_timestamp;
+
   struct
   {
     EdcRtspCheckExitFunc func;
@@ -173,6 +183,8 @@ struct _EdcRtsp
     int        width;
     int        height;

+    int64_t start_pts;
+
     EdcBuf *(*make_frame) (EdcRtsp  *rtsp,
                            AVPacket *pkt);
   } video;
@@ -182,6 +194,8 @@ struct _EdcRtsp
     int        has_stream;
     EdcCodecID codec_id;

+    int64_t start_pts;
+
     EdcBuf *(*make_frame) (EdcRtsp  *rtsp,
                            AVPacket *pkt);
     /**
@@ -263,6 +277,171 @@ edc_rtsp_log (EdcRtsp     *rtsp,
            rtsp->transport);
 }

+static int
+mp4serv_init_audio_resampling_filter (EdcRtsp *rtsp)
+{
+  EdcRtspConvert *c;
+  const AVFilter *buffersrc;
+  const AVFilter *buffersink;
+  AVFilterInOut *outputs;
+  AVFilterInOut *inputs;
+  char args[512];
+  int ret;
+
+  c = &rtsp->audio.convert;
+
+  c->graph = avfilter_graph_alloc ();
+  if (!c->graph)
+    {
+      edc_rtsp_debug (rtsp, "%s", "failed to create audio filter graph");
+      return -1;
+    }
+
+  buffersrc = avfilter_get_by_name ("abuffer");
+  if (!buffersrc)
+    {
+      edc_rtsp_debug (rtsp, "failed to get `%s` filter", "abuffer");
+      return -1;
+    }
+
+  g_snprintf (args, sizeof(args),
+              "time_base=%d/%d:sample_rate=%d:sample_fmt=%s:channel_layout=0x%"PRIx64,
+              1, c->input_sample_rate,
+              c->input_sample_rate,
+              av_get_sample_fmt_name (c->input_sample_fmt),
+              av_get_default_channel_layout (c->input_channels));
+
+  ret = avfilter_graph_create_filter (&c->src,
+                                      buffersrc,
+                                      "in",
+                                      args,
+                                      NULL,
+                                      c->graph);
+  if (ret < 0)
+    {
+      edc_rtsp_debug (rtsp, "failed to create `%s` filter", "abuffer");
+      return -1;
+    }
+
+  buffersink = avfilter_get_by_name ("abuffersink");
+  if (!buffersink)
+    {
+      edc_rtsp_debug (rtsp, "failed to get `%s` filter", "abuffersink");
+      return -1;
+    }
+
+  ret = avfilter_graph_create_filter (&c->sink,
+                                      buffersink,
+                                      "out",
+                                      NULL,
+                                      NULL,
+                                      c->graph);
+  if (ret < 0)
+    {
+      edc_rtsp_debug (rtsp, "failed to create `%s` filter", "abuffersink");
+      return -1;
+    }
+
+  ret = av_opt_set_bin (c->sink,
+                        "sample_fmts",
+                        (uint8_t *) &c->output_sample_fmt,
+                        sizeof (c->output_sample_fmt),
+                        AV_OPT_SEARCH_CHILDREN);
+  if (ret < 0)
+    {
+      edc_rtsp_debug (rtsp, "%s", "failed to set output sample format");
+      return -1;
+    }
+
+  {
+    int64_t channel_layout;
+
+    channel_layout = av_get_default_channel_layout (c->output_channels);
+
+    ret = av_opt_set_bin (c->sink,
+                          "channel_layouts",
+                          (uint8_t *) &channel_layout,
+                          sizeof (channel_layout),
+                          AV_OPT_SEARCH_CHILDREN);
+    if (ret < 0)
+      {
+        edc_rtsp_debug (rtsp, "%s", "failed to set output channel layout");
+        return -1;
+      }
+  }
+
+  ret = av_opt_set_bin (c->sink,
+                        "sample_rates",
+                        (uint8_t *) &c->output_sample_rate,
+                        sizeof (c->output_sample_rate),
+                        AV_OPT_SEARCH_CHILDREN);
+  if (ret < 0)
+    {
+      edc_rtsp_debug (rtsp, "%s", "failed to set output sample rate");
+      return -1;
+    }
+
+  outputs = avfilter_inout_alloc ();
+  outputs->name = av_strdup ("in");
+  outputs->filter_ctx = c->src;
+  outputs->pad_idx = 0;
+  outputs->next = NULL;
+
+  inputs = avfilter_inout_alloc ();
+  inputs->name = av_strdup ("out");
+  inputs->filter_ctx = c->sink;
+  inputs->pad_idx = 0;
+  inputs->next = NULL;
+
+  do
+    {
+      ret = avfilter_graph_parse_ptr (c->graph,
+                                      "aresample",
+                                      &inputs,
+                                      &outputs,
+                                      NULL);
+      if (ret < 0)
+        {
+          edc_rtsp_debug (rtsp, "failed to parse audio filter graph : '%s'(%d)",
+                         av_err2str (ret), ret);
+          avfilter_graph_free (&c->graph);
+          break;
+        }
+
+      ret = avfilter_graph_config (c->graph, NULL);
+      if (ret < 0)
+        {
+          edc_rtsp_debug (rtsp, "failed to configure audio filter graph : '%s'(%d)",
+                         av_err2str (ret), ret);
+          avfilter_graph_free (&c->graph);
+          break;
+        }
+
+      //if (!(c->encoder->codec->capabilities & AV_CODEC_CAP_VARIABLE_FRAME_SIZE))
+      //av_buffersink_set_frame_size (c->sink, c->encoder->frame_size);
+
+      if (1)
+        {
+          char *dump;
+
+          dump = avfilter_graph_dump (c->graph, NULL);
+          if (dump)
+            {
+              edc_rtsp_debug (rtsp, "resampling filter graph\n%s", dump);
+              av_free (dump);
+            }
+        }
+
+      ret = 0;
+    }
+  while (0);
+
+  avfilter_inout_free (&inputs);
+  avfilter_inout_free (&outputs);
+
+  return ret;
+}
+
 static void
 edc_rtsp_handle_error (EdcRtsp    *rtsp,
                        const char *fmt,
@@ -567,6 +746,9 @@ edc_rtsp_read_rtp_real (EdcRtsp  *rtsp,
   if (!pkt || pkt->size <= 0 || !pkt->data)
     return NULL;

+  if (rtsp->base_timestamp == 0)
+    rtsp->base_timestamp = edc_get_real_time ();
+
   /* RTP extension header */
   if (rtsp->fmt.avfctx->rtp_ext_header_len > 0)
     {
@@ -584,19 +766,37 @@ edc_rtsp_read_rtp_real (EdcRtsp  *rtsp,
   buf = NULL;
   if (pkt->stream_index == rtsp->fmt.video_stream)
     {
+      AVRational tb = { 1, 90000 };
+
+      if (rtsp->video.start_pts < 0)
+        rtsp->video.start_pts = pkt->pts;
+
       if (!rtsp->video.has_stream ||
           !rtsp->use_stream[EDC_RTSP_VIDEO])
         return NULL;

+      if (rtsp->video.start_pts > pkt->pts)
+        rtsp->video.start_pts = pkt->pts;
+
+      pkt->pts -= rtsp->video.start_pts;
+
+      if (0) edc_rtsp_log (rtsp, "video time:%s pts:%ld", av_ts2timestr (pkt->pts, &tb), pkt->pts);
+
       buf = rtsp->video.make_frame (rtsp, pkt);
       if (buf)
         {
           buf->width = rtsp->video.width;
           buf->height = rtsp->video.height;
+          buf->timestamp = rtsp->base_timestamp + av_rescale_q (pkt->pts, tb, AV_TIME_BASE_Q);
         }
     }
   else if (pkt->stream_index == rtsp->fmt.audio_stream)
     {
+      AVRational tb = { 1, 44100 };
+
+      if (rtsp->audio.start_pts < 0)
+        rtsp->audio.start_pts = pkt->pts;
+
       if (!rtsp->audio.has_stream ||
           !rtsp->use_stream[EDC_RTSP_AUDIO])
         return NULL;
@@ -604,7 +804,13 @@ edc_rtsp_read_rtp_real (EdcRtsp  *rtsp,
       if (rtsp->ignore_audio_frame)
         return NULL;

+      pkt->pts -= rtsp->audio.start_pts;
+
+      if (0) edc_rtsp_log (rtsp, "audio time:%s pts:%ld", av_ts2timestr (pkt->pts, &tb), pkt->pts);
+
       buf = rtsp->audio.make_frame (rtsp, pkt);
+      if (buf)
+        buf->timestamp = rtsp->base_timestamp + av_rescale_q (pkt->pts, tb, AV_TIME_BASE_Q);
     }
   else if (pkt->stream_index == rtsp->fmt.metadata_stream)
     {
@@ -973,6 +1179,144 @@ edc_rtsp_make_audio_frame_resampled (EdcRtsp  *rtsp,
   return buf;
 }

+static EdcBuf *
+edc_rtsp_make_audio_frame_resampled2 (EdcRtsp  *rtsp,
+                                      AVPacket *pkt)
+{
+  EdcRtspConvert *c;
+  EdcBuf *buf = NULL;
+  int len, pcm_len;
+  const uint8_t **dec_data;
+  int dec_data_nb_samples;
+  int ret;
+
+  c = &rtsp->audio.convert;
+
+  dec_data = edc_codec_decode_audio2 (c->decoder,
+                                      (char *) pkt->data,
+                                      (int) pkt->size,
+                                      &dec_data_nb_samples);
+  if (!dec_data)
+    {
+      int error_count;
+
+      error_count = edc_codec_get_error_count (c->decoder);
+      if (error_count <= 5 || (error_count % 100) == 0)
+        edc_rtsp_log (rtsp,
+                      "failed to decode audio frame(%d) (count:%d)",
+                      pkt->size, error_count);
+
+      edc_rtsp_handle_error (rtsp, "Audio Resample");
+
+      return NULL;
+    }
+
+  {
+    AVRational tb = (AVRational) { 1, c->output_sample_rate };
+    AVFrame *frame;
+
+    frame = av_frame_alloc ();
+    frame->pts = pkt->pts;
+    frame->pkt_dts = frame->pts;
+    frame->channel_layout = av_get_default_channel_layout (c->input_channels);
+    frame->channels = av_get_channel_layout_nb_channels (frame->channel_layout);
+    frame->sample_rate = c->input_sample_rate;
+    frame->format = c->input_sample_fmt;
+    frame->nb_samples = dec_data_nb_samples;
+
+    avcodec_fill_audio_frame (frame,
+                              frame->channels,
+                              frame->format,
+                              (const uint8_t *) dec_data[0],
+                              192000,
+                              0);
+
+    if (0) edc_log ("audio decoded tb(%d/%d) nb_samples:%d sample_rate:%d fmt:%d layout:%ld time:%s pts:%s pkt_dts:%s",
+                    tb.num,
+                    tb.den,
+                    frame->nb_samples,
+                    frame->sample_rate,
+                    frame->format,
+                    frame->channel_layout,
+                    av_ts2timestr (frame->pts, &tb),
+                    av_ts2str (frame->pts),
+                    av_ts2str (frame->pkt_dts));
+
+    /* send frame to the audio resampler */
+    ret = av_buffersrc_add_frame_flags (c->src,
+                                        frame,
+                                        AV_BUFFERSRC_FLAG_KEEP_REF);
+    if (ret < 0)
+      edc_rtsp_debug (rtsp, "failed to add frame to buffersrc: '%s'(%d)",
+                      av_err2str (ret), ret);
+
+    av_frame_free (&frame);
+  }
+
+  while (TRUE)
+    {
+      AVFrame *frame;
+
+      frame = av_frame_alloc ();
+
+      ret = av_buffersink_get_frame (c->sink, frame);
+      if (ret < 0)
+        {
+          if (ret != AVERROR (EAGAIN) && ret != AVERROR_EOF)
+            edc_rtsp_debug (rtsp, "failed to get frame from buffersink: '%s'(%d)",
+                            av_err2str (ret), ret);
+
+          av_frame_free (&frame);
+          break;
+        }
+
+      if (0)
+        {
+          AVRational tb;
+
+          tb = av_buffersink_get_time_base (c->sink);
+          edc_log ("audio resampled tb(%d/%d) nb_samples:%d sample_rate:%d fmt:%d layout:%ld time:%s pts:%s pkt_dts:%s",
+                  tb.num, tb.den,
+                  frame->nb_samples,
+                  frame->sample_rate,
+                  frame->format,
+                  frame->channel_layout,
+                  av_ts2timestr (frame->pts, &tb),
+                  av_ts2str (frame->pts),
+                  av_ts2str (frame->pkt_dts));
+        }
+
+      pcm_len = frame->nb_samples * c->output_bytes_per_sample * c->output_channels;
+      if (pcm_len <= 0)
+        {
+          av_frame_free (&frame);
+          break;
+        }
+
+      /* ADCPM */
+      buf = edc_buf_new ((pcm_len / 4) + 4 /* header */ + 4 /* dummy */);
+      len = edc_adpcm_encode (c->encoder,
+                              (short *) frame->extended_data[0],
+                              (unsigned char *) buf->data,
+                              pcm_len);
+      if (len <= 0)
+        {
+          edc_buf_unref (buf);
+          return NULL;
+        }
+
+      buf->channel = 0;
+      buf->flags = EDC_BUF_FLAG_AUDIO;
+      buf->codec_id = EDC_CODEC_ADPCM;
+      buf->len = len;
+
+      av_frame_free (&frame);
+      break;
+    }
+
+  return buf;
+}
+
 static EdcBuf *
 edc_rtsp_make_audio_frame_encoded (EdcRtsp  *rtsp,
                                    AVPacket *pkt)
@@ -1526,7 +1870,13 @@ edc_rtsp_open_fmt (EdcRtsp *rtsp)
                   continue;
                 }

-              rtsp->audio.make_frame = edc_rtsp_make_audio_frame_resampled;
+              if (0)
+                rtsp->audio.make_frame = edc_rtsp_make_audio_frame_resampled;
+              else
+                {
+                  mp4serv_init_audio_resampling_filter (rtsp);
+                  rtsp->audio.make_frame = edc_rtsp_make_audio_frame_resampled2;
+                }
             }
           else
             {
@@ -1891,6 +2241,9 @@ edc_rtsp_start (EdcRtsp *rtsp)
     }
   rtsp->started = TRUE;
   rtsp->key_received = FALSE;
+  rtsp->base_timestamp = 0;
+  rtsp->video.start_pts = -1;
+  rtsp->audio.start_pts = -1;

   edc_reset_rel_timestamp (&rtsp->rel_timestamp);
	diff --git a/edc-rtsp.c b/edc-rtsp.c
	index 161ca75d..e73aecdf 100644
	--- a/edc-rtsp.c
	+++ b/edc-rtsp.c
	@@ -20,6 +20,11 @@
	#include <libswresample/swresample.h>
	#include <libavutil/random_seed.h>
	#include <libavutil/intreadwrite.h>
	+#include <libavutil/timestamp.h>
	+#include <libavfilter/avfilter.h>
	+#include <libavfilter/buffersrc.h>
	+#include <libavfilter/buffersink.h>
	+#include <libavutil/timestamp.h>

	#ifndef _
	#if defined(linux) && defined(PACKAGE)
	@@ -71,6 +76,10 @@ struct _EdcRtspConvert
	int output_bytes_per_sample;
	int output_sample_rate;
	int output_channels;
	+
	+ AVFilterGraph *graph;
	+ AVFilterContext *src;
	+ AVFilterContext *sink;
	};

	typedef enum _EdcRtspSourceType
	@@ -109,7 +118,6 @@ struct _EdcRtspAudioOut
	uint32_t ssrc;
	};

	-
	/**
	* EdcRtsp object
	*/
	@@ -120,6 +128,8 @@ struct _EdcRtsp
	EdcRtspFrameRecv *frame_recv;
	EdcRtspErrorFunc err_func;

	+ int64_t base_timestamp;
	+
	struct
	{
	EdcRtspCheckExitFunc func;
	@@ -173,6 +183,8 @@ struct _EdcRtsp
	int width;
	int height;

	+ int64_t start_pts;
	+
	EdcBuf (make_frame) (EdcRtsp *rtsp,
	AVPacket *pkt);
	} video;
	@@ -182,6 +194,8 @@ struct _EdcRtsp
	int has_stream;
	EdcCodecID codec_id;

	+ int64_t start_pts;
	+
	EdcBuf (make_frame) (EdcRtsp *rtsp,
	AVPacket *pkt);
	/**
	@@ -263,6 +277,171 @@ edc_rtsp_log (EdcRtsp *rtsp,
	rtsp->transport);
	}

	+static int
	+mp4serv_init_audio_resampling_filter (EdcRtsp *rtsp)
	+{
	+ EdcRtspConvert *c;
	+ const AVFilter *buffersrc;
	+ const AVFilter *buffersink;
	+ AVFilterInOut *outputs;
	+ AVFilterInOut *inputs;
	+ char args[512];
	+ int ret;
	+
	+ c = &rtsp->audio.convert;
	+
	+ c->graph = avfilter_graph_alloc ();
	+ if (!c->graph)
	+ {
	+ edc_rtsp_debug (rtsp, "%s", "failed to create audio filter graph");
	+ return -1;
	+ }
	+
	+ buffersrc = avfilter_get_by_name ("abuffer");
	+ if (!buffersrc)
	+ {
	+ edc_rtsp_debug (rtsp, "failed to get `%s` filter", "abuffer");
	+ return -1;
	+ }
	+
	+ g_snprintf (args, sizeof(args),
	+ "time_base=%d/%d:sample_rate=%d:sample_fmt=%s:channel_layout=0x%"PRIx64,
	+ 1, c->input_sample_rate,
	+ c->input_sample_rate,
	+ av_get_sample_fmt_name (c->input_sample_fmt),
	+ av_get_default_channel_layout (c->input_channels));
	+
	+ ret = avfilter_graph_create_filter (&c->src,
	+ buffersrc,
	+ "in",
	+ args,
	+ NULL,
	+ c->graph);
	+ if (ret < 0)
	+ {
	+ edc_rtsp_debug (rtsp, "failed to create `%s` filter", "abuffer");
	+ return -1;
	+ }
	+
	+ buffersink = avfilter_get_by_name ("abuffersink");
	+ if (!buffersink)
	+ {
	+ edc_rtsp_debug (rtsp, "failed to get `%s` filter", "abuffersink");
	+ return -1;
	+ }
	+
	+ ret = avfilter_graph_create_filter (&c->sink,
	+ buffersink,
	+ "out",
	+ NULL,
	+ NULL,
	+ c->graph);
	+ if (ret < 0)
	+ {
	+ edc_rtsp_debug (rtsp, "failed to create `%s` filter", "abuffersink");
	+ return -1;
	+ }
	+
	+ ret = av_opt_set_bin (c->sink,
	+ "sample_fmts",
	+ (uint8_t *) &c->output_sample_fmt,
	+ sizeof (c->output_sample_fmt),
	+ AV_OPT_SEARCH_CHILDREN);
	+ if (ret < 0)
	+ {
	+ edc_rtsp_debug (rtsp, "%s", "failed to set output sample format");
	+ return -1;
	+ }
	+
	+ {
	+ int64_t channel_layout;
	+
	+ channel_layout = av_get_default_channel_layout (c->output_channels);
	+
	+ ret = av_opt_set_bin (c->sink,
	+ "channel_layouts",
	+ (uint8_t *) &channel_layout,
	+ sizeof (channel_layout),
	+ AV_OPT_SEARCH_CHILDREN);
	+ if (ret < 0)
	+ {
	+ edc_rtsp_debug (rtsp, "%s", "failed to set output channel layout");
	+ return -1;
	+ }
	+ }
	+
	+ ret = av_opt_set_bin (c->sink,
	+ "sample_rates",
	+ (uint8_t *) &c->output_sample_rate,
	+ sizeof (c->output_sample_rate),
	+ AV_OPT_SEARCH_CHILDREN);
	+ if (ret < 0)
	+ {
	+ edc_rtsp_debug (rtsp, "%s", "failed to set output sample rate");
	+ return -1;
	+ }
	+
	+ outputs = avfilter_inout_alloc ();
	+ outputs->name = av_strdup ("in");
	+ outputs->filter_ctx = c->src;
	+ outputs->pad_idx = 0;
	+ outputs->next = NULL;
	+
	+ inputs = avfilter_inout_alloc ();
	+ inputs->name = av_strdup ("out");
	+ inputs->filter_ctx = c->sink;
	+ inputs->pad_idx = 0;
	+ inputs->next = NULL;
	+
	+ do
	+ {
	+ ret = avfilter_graph_parse_ptr (c->graph,
	+ "aresample",
	+ &inputs,
	+ &outputs,
	+ NULL);
	+ if (ret < 0)
	+ {
	+ edc_rtsp_debug (rtsp, "failed to parse audio filter graph : '%s'(%d)",
	+ av_err2str (ret), ret);
	+ avfilter_graph_free (&c->graph);
	+ break;
	+ }
	+
	+ ret = avfilter_graph_config (c->graph, NULL);
	+ if (ret < 0)
	+ {
	+ edc_rtsp_debug (rtsp, "failed to configure audio filter graph : '%s'(%d)",
	+ av_err2str (ret), ret);
	+ avfilter_graph_free (&c->graph);
	+ break;
	+ }
	+
	+ //if (!(c->encoder->codec->capabilities & AV_CODEC_CAP_VARIABLE_FRAME_SIZE))
	+ //av_buffersink_set_frame_size (c->sink, c->encoder->frame_size);
	+
	+ if (1)
	+ {
	+ char *dump;
	+
	+ dump = avfilter_graph_dump (c->graph, NULL);
	+ if (dump)
	+ {
	+ edc_rtsp_debug (rtsp, "resampling filter graph\n%s", dump);
	+ av_free (dump);
	+ }
	+ }
	+
	+ ret = 0;
	+ }
	+ while (0);
	+
	+ avfilter_inout_free (&inputs);
	+ avfilter_inout_free (&outputs);
	+
	+ return ret;
	+}
	+
	static void
	edc_rtsp_handle_error (EdcRtsp *rtsp,
	const char *fmt,
	@@ -567,6 +746,9 @@ edc_rtsp_read_rtp_real (EdcRtsp *rtsp,
	if (!pkt \|\| pkt->size <= 0 \|\| !pkt->data)
	return NULL;

	+ if (rtsp->base_timestamp == 0)
	+ rtsp->base_timestamp = edc_get_real_time ();
	+
	/* RTP extension header */
	if (rtsp->fmt.avfctx->rtp_ext_header_len > 0)
	{
	@@ -584,19 +766,37 @@ edc_rtsp_read_rtp_real (EdcRtsp *rtsp,
	buf = NULL;
	if (pkt->stream_index == rtsp->fmt.video_stream)
	{
	+ AVRational tb = { 1, 90000 };
	+
	+ if (rtsp->video.start_pts < 0)
	+ rtsp->video.start_pts = pkt->pts;
	+
	if (!rtsp->video.has_stream \|\|
	!rtsp->use_stream[EDC_RTSP_VIDEO])
	return NULL;

	+ if (rtsp->video.start_pts > pkt->pts)
	+ rtsp->video.start_pts = pkt->pts;
	+
	+ pkt->pts -= rtsp->video.start_pts;
	+
	+ if (0) edc_rtsp_log (rtsp, "video time:%s pts:%ld", av_ts2timestr (pkt->pts, &tb), pkt->pts);
	+
	buf = rtsp->video.make_frame (rtsp, pkt);
	if (buf)
	{
	buf->width = rtsp->video.width;
	buf->height = rtsp->video.height;
	+ buf->timestamp = rtsp->base_timestamp + av_rescale_q (pkt->pts, tb, AV_TIME_BASE_Q);
	}
	}
	else if (pkt->stream_index == rtsp->fmt.audio_stream)
	{
	+ AVRational tb = { 1, 44100 };
	+
	+ if (rtsp->audio.start_pts < 0)
	+ rtsp->audio.start_pts = pkt->pts;
	+
	if (!rtsp->audio.has_stream \|\|
	!rtsp->use_stream[EDC_RTSP_AUDIO])
	return NULL;
	@@ -604,7 +804,13 @@ edc_rtsp_read_rtp_real (EdcRtsp *rtsp,
	if (rtsp->ignore_audio_frame)
	return NULL;

	+ pkt->pts -= rtsp->audio.start_pts;
	+
	+ if (0) edc_rtsp_log (rtsp, "audio time:%s pts:%ld", av_ts2timestr (pkt->pts, &tb), pkt->pts);
	+
	buf = rtsp->audio.make_frame (rtsp, pkt);
	+ if (buf)
	+ buf->timestamp = rtsp->base_timestamp + av_rescale_q (pkt->pts, tb, AV_TIME_BASE_Q);
	}
	else if (pkt->stream_index == rtsp->fmt.metadata_stream)
	{
	@@ -973,6 +1179,144 @@ edc_rtsp_make_audio_frame_resampled (EdcRtsp *rtsp,
	return buf;
	}

	+static EdcBuf *
	+edc_rtsp_make_audio_frame_resampled2 (EdcRtsp *rtsp,
	+ AVPacket *pkt)
	+{
	+ EdcRtspConvert *c;
	+ EdcBuf *buf = NULL;
	+ int len, pcm_len;
	+ const uint8_t **dec_data;
	+ int dec_data_nb_samples;
	+ int ret;
	+
	+ c = &rtsp->audio.convert;
	+
	+ dec_data = edc_codec_decode_audio2 (c->decoder,
	+ (char *) pkt->data,
	+ (int) pkt->size,
	+ &dec_data_nb_samples);
	+ if (!dec_data)
	+ {
	+ int error_count;
	+
	+ error_count = edc_codec_get_error_count (c->decoder);
	+ if (error_count <= 5 \|\| (error_count % 100) == 0)
	+ edc_rtsp_log (rtsp,
	+ "failed to decode audio frame(%d) (count:%d)",
	+ pkt->size, error_count);
	+
	+ edc_rtsp_handle_error (rtsp, "Audio Resample");
	+
	+ return NULL;
	+ }
	+
	+ {
	+ AVRational tb = (AVRational) { 1, c->output_sample_rate };
	+ AVFrame *frame;
	+
	+ frame = av_frame_alloc ();
	+ frame->pts = pkt->pts;
	+ frame->pkt_dts = frame->pts;
	+ frame->channel_layout = av_get_default_channel_layout (c->input_channels);
	+ frame->channels = av_get_channel_layout_nb_channels (frame->channel_layout);
	+ frame->sample_rate = c->input_sample_rate;
	+ frame->format = c->input_sample_fmt;
	+ frame->nb_samples = dec_data_nb_samples;
	+
	+ avcodec_fill_audio_frame (frame,
	+ frame->channels,
	+ frame->format,
	+ (const uint8_t *) dec_data[0],
	+ 192000,
	+ 0);
	+
	+ if (0) edc_log ("audio decoded tb(%d/%d) nb_samples:%d sample_rate:%d fmt:%d layout:%ld time:%s pts:%s pkt_dts:%s",
	+ tb.num,
	+ tb.den,
	+ frame->nb_samples,
	+ frame->sample_rate,
	+ frame->format,
	+ frame->channel_layout,
	+ av_ts2timestr (frame->pts, &tb),
	+ av_ts2str (frame->pts),
	+ av_ts2str (frame->pkt_dts));
	+
	+ /* send frame to the audio resampler */
	+ ret = av_buffersrc_add_frame_flags (c->src,
	+ frame,
	+ AV_BUFFERSRC_FLAG_KEEP_REF);
	+ if (ret < 0)
	+ edc_rtsp_debug (rtsp, "failed to add frame to buffersrc: '%s'(%d)",
	+ av_err2str (ret), ret);
	+
	+ av_frame_free (&frame);
	+ }
	+
	+ while (TRUE)
	+ {
	+ AVFrame *frame;
	+
	+ frame = av_frame_alloc ();
	+
	+ ret = av_buffersink_get_frame (c->sink, frame);
	+ if (ret < 0)
	+ {
	+ if (ret != AVERROR (EAGAIN) && ret != AVERROR_EOF)
	+ edc_rtsp_debug (rtsp, "failed to get frame from buffersink: '%s'(%d)",
	+ av_err2str (ret), ret);
	+
	+ av_frame_free (&frame);
	+ break;
	+ }
	+
	+ if (0)
	+ {
	+ AVRational tb;
	+
	+ tb = av_buffersink_get_time_base (c->sink);
	+ edc_log ("audio resampled tb(%d/%d) nb_samples:%d sample_rate:%d fmt:%d layout:%ld time:%s pts:%s pkt_dts:%s",
	+ tb.num, tb.den,
	+ frame->nb_samples,
	+ frame->sample_rate,
	+ frame->format,
	+ frame->channel_layout,
	+ av_ts2timestr (frame->pts, &tb),
	+ av_ts2str (frame->pts),
	+ av_ts2str (frame->pkt_dts));
	+ }
	+
	+ pcm_len = frame->nb_samples * c->output_bytes_per_sample * c->output_channels;
	+ if (pcm_len <= 0)
	+ {
	+ av_frame_free (&frame);
	+ break;
	+ }
	+
	+ /* ADCPM */
	+ buf = edc_buf_new ((pcm_len / 4) + 4 /* header / + 4 / dummy */);
	+ len = edc_adpcm_encode (c->encoder,
	+ (short *) frame->extended_data[0],
	+ (unsigned char *) buf->data,
	+ pcm_len);
	+ if (len <= 0)
	+ {
	+ edc_buf_unref (buf);
	+ return NULL;
	+ }
	+
	+ buf->channel = 0;
	+ buf->flags = EDC_BUF_FLAG_AUDIO;
	+ buf->codec_id = EDC_CODEC_ADPCM;
	+ buf->len = len;
	+
	+ av_frame_free (&frame);
	+ break;
	+ }
	+
	+ return buf;
	+}
	+
	static EdcBuf *
	edc_rtsp_make_audio_frame_encoded (EdcRtsp *rtsp,
	AVPacket *pkt)
	@@ -1526,7 +1870,13 @@ edc_rtsp_open_fmt (EdcRtsp *rtsp)
	continue;
	}

	- rtsp->audio.make_frame = edc_rtsp_make_audio_frame_resampled;
	+ if (0)
	+ rtsp->audio.make_frame = edc_rtsp_make_audio_frame_resampled;
	+ else
	+ {
	+ mp4serv_init_audio_resampling_filter (rtsp);
	+ rtsp->audio.make_frame = edc_rtsp_make_audio_frame_resampled2;
	+ }
	}
	else
	{
	@@ -1891,6 +2241,9 @@ edc_rtsp_start (EdcRtsp *rtsp)
	}
	rtsp->started = TRUE;
	rtsp->key_received = FALSE;
	+ rtsp->base_timestamp = 0;
	+ rtsp->video.start_pts = -1;
	+ rtsp->audio.start_pts = -1;

	edc_reset_rel_timestamp (&rtsp->rel_timestamp);