moonpfe/use-rtp-timestamp-and-set-frame-size-to-2040.patch

## use-rtp-timestamp-and-set-frame-size-to-2040.patch
From a860ebd0e3b0555aed30eb7e70920687f3651104 Mon Sep 17 00:00:00 2001
From: InCheol Moon <moonpfe@emstone.com>
Date: Mon, 3 Sep 2018 16:31:07 +0900
Subject: [PATCH] rtsp: use rtp timestamp and set frame size to 2040

---
 edc-rtsp.c | 79 +++++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 76 insertions(+), 3 deletions(-)

diff --git a/edc-rtsp.c b/edc-rtsp.c
index 161ca75d..ae430826 100644
--- a/edc-rtsp.c
+++ b/edc-rtsp.c
@@ -20,6 +20,7 @@
 #include <libswresample/swresample.h>
 #include <libavutil/random_seed.h>
 #include <libavutil/intreadwrite.h>
+#include <libavutil/timestamp.h>

 #ifndef _
 #if defined(linux) && defined(PACKAGE)
@@ -34,6 +35,7 @@
 #define EDC_RTSP_KEEP_ALIVE_PERIOD  30   /**< seconds */
 #define EDC_RTSP_AUDIO_BUFFER_SIZE  32000
 #define EDC_RTSP_AUDIO_FRAME_SIZE   1600
+#define EDC_RTSP_AUDIO_SAMPLES      2040 /* 2041 */

 typedef struct AVIOInternal
 {
@@ -56,6 +58,13 @@ struct _EdcRtspConvert
     int   nb_samples;
   } r;

+  struct
+  {
+    char *buf;
+    int   len;
+    int   nb_samples;
+  } pcm;
+
   EdcCodec           *decoder;
   EdcCodecID          input_codec_id;
   enum AVSampleFormat input_sample_fmt;
@@ -109,7 +118,6 @@ struct _EdcRtspAudioOut
   uint32_t    ssrc;
 };

-
 /**
  * EdcRtsp object
  */
@@ -120,6 +128,8 @@ struct _EdcRtsp
   EdcRtspFrameRecv *frame_recv;
   EdcRtspErrorFunc  err_func;

+  int64_t base_timestamp;
+
   struct
   {
     EdcRtspCheckExitFunc func;
@@ -200,6 +210,7 @@ struct _EdcRtsp
   unsigned int started: 1;
   unsigned int key_received: 1;
   unsigned int use_https: 1;
+  unsigned int pkt_key_received: 1;
 };

 static int edc_rtsp_debug_level;
@@ -562,6 +573,7 @@ static inline EdcBuf *
 edc_rtsp_read_rtp_real (EdcRtsp  *rtsp,
                         AVPacket *pkt)
 {
+  AVStream *stream;
   EdcBuf *buf;

   if (!pkt || pkt->size <= 0 || !pkt->data)
@@ -581,9 +593,17 @@ edc_rtsp_read_rtp_real (EdcRtsp  *rtsp,
                                        rtsp->fmt.avfctx->rtp_ext_header_len);
     }

+  stream = rtsp->fmt.avfctx->streams[pkt->stream_index];
+
   buf = NULL;
   if (pkt->stream_index == rtsp->fmt.video_stream)
     {
+      if (0) edc_rtsp_log (rtsp,
+                           "video time:%s pts:%ld key:%d",
+                           av_ts2timestr (pkt->pts, &stream->time_base),
+                           pkt->pts,
+                           pkt->flags & AV_PKT_FLAG_KEY ? 1: 0);
+
       if (!rtsp->video.has_stream ||
           !rtsp->use_stream[EDC_RTSP_VIDEO])
         return NULL;
@@ -593,10 +613,20 @@ edc_rtsp_read_rtp_real (EdcRtsp  *rtsp,
         {
           buf->width = rtsp->video.width;
           buf->height = rtsp->video.height;
+          buf->timestamp = rtsp->base_timestamp
+                         + av_rescale_q (pkt->pts,
+                                         stream->time_base,
+                                         AV_TIME_BASE_Q);
         }
     }
   else if (pkt->stream_index == rtsp->fmt.audio_stream)
     {
+      if (0) edc_rtsp_log (rtsp,
+                           "audio time:%s pts:%ld key:%d",
+                           av_ts2timestr (pkt->pts, &stream->time_base),
+                           pkt->pts,
+                           pkt->flags & AV_PKT_FLAG_KEY ? 1 : 0);
+
       if (!rtsp->audio.has_stream ||
           !rtsp->use_stream[EDC_RTSP_AUDIO])
         return NULL;
@@ -605,6 +635,11 @@ edc_rtsp_read_rtp_real (EdcRtsp  *rtsp,
         return NULL;

       buf = rtsp->audio.make_frame (rtsp, pkt);
+      if (buf)
+        buf->timestamp = rtsp->base_timestamp
+                       + av_rescale_q (pkt->pts,
+                                       stream->time_base,
+                                       AV_TIME_BASE_Q);
     }
   else if (pkt->stream_index == rtsp->fmt.metadata_stream)
     {
@@ -706,7 +741,25 @@ edc_rtsp_read_rtp (EdcRtsp *rtsp)
       return NULL;
     }

-  buf = edc_rtsp_read_rtp_real (rtsp, pkt);
+  if (pkt->stream_index == rtsp->fmt.video_stream && pkt->flags & AV_PKT_FLAG_KEY)
+    rtsp->pkt_key_received = TRUE;
+
+  if (rtsp->pkt_key_received)
+    {
+      if (rtsp->base_timestamp == 0)
+        {
+          AVStream *stream = rtsp->fmt.avfctx->streams[pkt->stream_index];
+
+          rtsp->base_timestamp = edc_get_real_time ()
+                               - av_rescale_q (pkt->pts,
+                                               stream->time_base,
+                                               AV_TIME_BASE_Q);
+        }
+
+      buf = edc_rtsp_read_rtp_real (rtsp, pkt);
+    }
+  else
+    buf = NULL;

   av_packet_unref (pkt);

@@ -953,12 +1006,26 @@ edc_rtsp_make_audio_frame_resampled (EdcRtsp  *rtsp,
   if (pcm_len <= 0)
     return NULL;

+  memcpy (c->pcm.buf + c->pcm.len, c->r.data, pcm_len);
+  c->pcm.len += pcm_len;
+  c->pcm.nb_samples += pcm_samples;
+  if (c->pcm.nb_samples < EDC_RTSP_AUDIO_SAMPLES)
+    return NULL;
+
+  pcm_len = EDC_RTSP_AUDIO_SAMPLES * c->output_bytes_per_sample * c->output_channels;
+
   /* ADCPM */
   buf = edc_buf_new ((pcm_len / 4) + 4 /* header */ + 4 /* dummy */);
   len = edc_adpcm_encode (c->encoder,
-                          (short *) c->r.data,
+                          (short *) c->pcm.buf,
                           (unsigned char *) buf->data,
                           pcm_len);
+
+  c->pcm.nb_samples -= EDC_RTSP_AUDIO_SAMPLES;
+  c->pcm.len -= pcm_len;
+  if (c->pcm.len > 0)
+    memcpy (c->pcm.buf, c->pcm.buf + pcm_len, c->pcm.len);
+
   if (len <= 0)
     {
       edc_buf_unref (buf);
@@ -1502,6 +1569,10 @@ edc_rtsp_open_fmt (EdcRtsp *rtsp)
           c->r.data = malloc (c->r.alloc_size + AV_INPUT_BUFFER_PADDING_SIZE);
           c->r.nb_samples = c->r.alloc_size / c->output_bytes_per_sample / c->output_channels;

+          c->pcm.buf = malloc (EDC_CODEC_MAX_AUDIO_FRAME_SIZE * 4 + AV_INPUT_BUFFER_PADDING_SIZE);
+          c->pcm.len = 0;
+          c->pcm.nb_samples = 0;
+
           c->resampler = NULL;
           if (c->input_sample_fmt != c->output_sample_fmt ||
               c->input_sample_rate != c->output_sample_rate ||
@@ -1891,6 +1962,8 @@ edc_rtsp_start (EdcRtsp *rtsp)
     }
   rtsp->started = TRUE;
   rtsp->key_received = FALSE;
+  rtsp->base_timestamp = 0;
+  rtsp->pkt_key_received = FALSE;

   edc_reset_rel_timestamp (&rtsp->rel_timestamp);

--
2.18.0
	From a860ebd0e3b0555aed30eb7e70920687f3651104 Mon Sep 17 00:00:00 2001
	From: InCheol Moon <moonpfe@emstone.com>
	Date: Mon, 3 Sep 2018 16:31:07 +0900
	Subject: [PATCH] rtsp: use rtp timestamp and set frame size to 2040

	---
	edc-rtsp.c \| 79 +++++++++++++++++++++++++++++++++++++++++++++++++++---
	1 file changed, 76 insertions(+), 3 deletions(-)

	diff --git a/edc-rtsp.c b/edc-rtsp.c
	index 161ca75d..ae430826 100644
	--- a/edc-rtsp.c
	+++ b/edc-rtsp.c
	@@ -20,6 +20,7 @@
	#include <libswresample/swresample.h>
	#include <libavutil/random_seed.h>
	#include <libavutil/intreadwrite.h>
	+#include <libavutil/timestamp.h>

	#ifndef _
	#if defined(linux) && defined(PACKAGE)
	@@ -34,6 +35,7 @@
	#define EDC_RTSP_KEEP_ALIVE_PERIOD 30 /*< seconds /
	#define EDC_RTSP_AUDIO_BUFFER_SIZE 32000
	#define EDC_RTSP_AUDIO_FRAME_SIZE 1600
	+#define EDC_RTSP_AUDIO_SAMPLES 2040 /* 2041 */

	typedef struct AVIOInternal
	{
	@@ -56,6 +58,13 @@ struct _EdcRtspConvert
	int nb_samples;
	} r;

	+ struct
	+ {
	+ char *buf;
	+ int len;
	+ int nb_samples;
	+ } pcm;
	+
	EdcCodec *decoder;
	EdcCodecID input_codec_id;
	enum AVSampleFormat input_sample_fmt;
	@@ -109,7 +118,6 @@ struct _EdcRtspAudioOut
	uint32_t ssrc;
	};

	-
	/**
	* EdcRtsp object
	*/
	@@ -120,6 +128,8 @@ struct _EdcRtsp
	EdcRtspFrameRecv *frame_recv;
	EdcRtspErrorFunc err_func;

	+ int64_t base_timestamp;
	+
	struct
	{
	EdcRtspCheckExitFunc func;
	@@ -200,6 +210,7 @@ struct _EdcRtsp
	unsigned int started: 1;
	unsigned int key_received: 1;
	unsigned int use_https: 1;
	+ unsigned int pkt_key_received: 1;
	};

	static int edc_rtsp_debug_level;
	@@ -562,6 +573,7 @@ static inline EdcBuf *
	edc_rtsp_read_rtp_real (EdcRtsp *rtsp,
	AVPacket *pkt)
	{
	+ AVStream *stream;
	EdcBuf *buf;

	if (!pkt \|\| pkt->size <= 0 \|\| !pkt->data)
	@@ -581,9 +593,17 @@ edc_rtsp_read_rtp_real (EdcRtsp *rtsp,
	rtsp->fmt.avfctx->rtp_ext_header_len);
	}

	+ stream = rtsp->fmt.avfctx->streams[pkt->stream_index];
	+
	buf = NULL;
	if (pkt->stream_index == rtsp->fmt.video_stream)
	{
	+ if (0) edc_rtsp_log (rtsp,
	+ "video time:%s pts:%ld key:%d",
	+ av_ts2timestr (pkt->pts, &stream->time_base),
	+ pkt->pts,
	+ pkt->flags & AV_PKT_FLAG_KEY ? 1: 0);
	+
	if (!rtsp->video.has_stream \|\|
	!rtsp->use_stream[EDC_RTSP_VIDEO])
	return NULL;
	@@ -593,10 +613,20 @@ edc_rtsp_read_rtp_real (EdcRtsp *rtsp,
	{
	buf->width = rtsp->video.width;
	buf->height = rtsp->video.height;
	+ buf->timestamp = rtsp->base_timestamp
	+ + av_rescale_q (pkt->pts,
	+ stream->time_base,
	+ AV_TIME_BASE_Q);
	}
	}
	else if (pkt->stream_index == rtsp->fmt.audio_stream)
	{
	+ if (0) edc_rtsp_log (rtsp,
	+ "audio time:%s pts:%ld key:%d",
	+ av_ts2timestr (pkt->pts, &stream->time_base),
	+ pkt->pts,
	+ pkt->flags & AV_PKT_FLAG_KEY ? 1 : 0);
	+
	if (!rtsp->audio.has_stream \|\|
	!rtsp->use_stream[EDC_RTSP_AUDIO])
	return NULL;
	@@ -605,6 +635,11 @@ edc_rtsp_read_rtp_real (EdcRtsp *rtsp,
	return NULL;

	buf = rtsp->audio.make_frame (rtsp, pkt);
	+ if (buf)
	+ buf->timestamp = rtsp->base_timestamp
	+ + av_rescale_q (pkt->pts,
	+ stream->time_base,
	+ AV_TIME_BASE_Q);
	}
	else if (pkt->stream_index == rtsp->fmt.metadata_stream)
	{
	@@ -706,7 +741,25 @@ edc_rtsp_read_rtp (EdcRtsp *rtsp)
	return NULL;
	}

	- buf = edc_rtsp_read_rtp_real (rtsp, pkt);
	+ if (pkt->stream_index == rtsp->fmt.video_stream && pkt->flags & AV_PKT_FLAG_KEY)
	+ rtsp->pkt_key_received = TRUE;
	+
	+ if (rtsp->pkt_key_received)
	+ {
	+ if (rtsp->base_timestamp == 0)
	+ {
	+ AVStream *stream = rtsp->fmt.avfctx->streams[pkt->stream_index];
	+
	+ rtsp->base_timestamp = edc_get_real_time ()
	+ - av_rescale_q (pkt->pts,
	+ stream->time_base,
	+ AV_TIME_BASE_Q);
	+ }
	+
	+ buf = edc_rtsp_read_rtp_real (rtsp, pkt);
	+ }
	+ else
	+ buf = NULL;

	av_packet_unref (pkt);

	@@ -953,12 +1006,26 @@ edc_rtsp_make_audio_frame_resampled (EdcRtsp *rtsp,
	if (pcm_len <= 0)
	return NULL;

	+ memcpy (c->pcm.buf + c->pcm.len, c->r.data, pcm_len);
	+ c->pcm.len += pcm_len;
	+ c->pcm.nb_samples += pcm_samples;
	+ if (c->pcm.nb_samples < EDC_RTSP_AUDIO_SAMPLES)
	+ return NULL;
	+
	+ pcm_len = EDC_RTSP_AUDIO_SAMPLES * c->output_bytes_per_sample * c->output_channels;
	+
	/* ADCPM */
	buf = edc_buf_new ((pcm_len / 4) + 4 /* header / + 4 / dummy */);
	len = edc_adpcm_encode (c->encoder,
	- (short *) c->r.data,
	+ (short *) c->pcm.buf,
	(unsigned char *) buf->data,
	pcm_len);
	+
	+ c->pcm.nb_samples -= EDC_RTSP_AUDIO_SAMPLES;
	+ c->pcm.len -= pcm_len;
	+ if (c->pcm.len > 0)
	+ memcpy (c->pcm.buf, c->pcm.buf + pcm_len, c->pcm.len);
	+
	if (len <= 0)
	{
	edc_buf_unref (buf);
	@@ -1502,6 +1569,10 @@ edc_rtsp_open_fmt (EdcRtsp *rtsp)
	c->r.data = malloc (c->r.alloc_size + AV_INPUT_BUFFER_PADDING_SIZE);
	c->r.nb_samples = c->r.alloc_size / c->output_bytes_per_sample / c->output_channels;

	+ c->pcm.buf = malloc (EDC_CODEC_MAX_AUDIO_FRAME_SIZE * 4 + AV_INPUT_BUFFER_PADDING_SIZE);
	+ c->pcm.len = 0;
	+ c->pcm.nb_samples = 0;
	+
	c->resampler = NULL;
	if (c->input_sample_fmt != c->output_sample_fmt \|\|
	c->input_sample_rate != c->output_sample_rate \|\|
	@@ -1891,6 +1962,8 @@ edc_rtsp_start (EdcRtsp *rtsp)
	}
	rtsp->started = TRUE;
	rtsp->key_received = FALSE;
	+ rtsp->base_timestamp = 0;
	+ rtsp->pkt_key_received = FALSE;

	edc_reset_rel_timestamp (&rtsp->rel_timestamp);

	--
	2.18.0