Suhail/encode.cpp

## encode.cpp
/*
  I am sending encode(uint8_t, int, void *) data from a 12 MB file then calling a flush. This is the output I get:

Attempting to capture audio from a file...
Initializing encoder...
sample rate: 48000
Read 12292096 bytes from a file
------------------------------------------
Size of in_linesize: 12292096
out_samples: 1
Size of out resampled converted data: 128
Encoding...
You need to keep sending data.
Encoding...
Received new encoded data of size 146 bytes
Received new encoded data of size 6 bytes
You need to keep sending data.

It writes to a file after where test.m4a is 2 bytes for some reason

*/

#include "encode.h"

extern "C" {
#include <libavcodec/avcodec.h>
#include <libavresample/avresample.h>
#include <libavutil/channel_layout.h>
#include <libavutil/common.h>
#include <libavutil/frame.h>
#include <libavutil/opt.h>
#include <libavutil/samplefmt.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
}

namespace ffmpeg_encode {

const AVCodec *codec;
AVCodecContext *c = NULL;

/* check that a given sample format is supported by the encoder */
static int check_sample_fmt(const AVCodec *codec,
                            enum AVSampleFormat sample_fmt) {
  const enum AVSampleFormat *p = codec->sample_fmts;
  while (*p != AV_SAMPLE_FMT_NONE) {
    if (*p == sample_fmt) return 1;
    p++;
  }
  return 0;
}
/* just pick the highest supported samplerate */
static int select_sample_rate(const AVCodec *codec) {
  const int *p;
  int best_samplerate = 0;
  if (!codec->supported_samplerates) return 44100;
  p = codec->supported_samplerates;
  while (*p) {
    if (!best_samplerate || abs(44100 - *p) < abs(44100 - best_samplerate))
      best_samplerate = *p;
    p++;
  }
  return best_samplerate;
}
/* select layout with the highest channel count */
static int select_channel_layout(const AVCodec *codec) {
  const uint64_t *p;
  uint64_t best_ch_layout = 0;
  int best_nb_channels = 0;
  if (!codec->channel_layouts) return AV_CH_LAYOUT_STEREO;
  p = codec->channel_layouts;
  while (*p) {
    int nb_channels = av_get_channel_layout_nb_channels(*p);
    if (nb_channels > best_nb_channels) {
      best_ch_layout = *p;
      best_nb_channels = nb_channels;
    }
    p++;
  }
  return best_ch_layout;
}

static void encode(AVCodecContext *ctx, AVFrame *frame, AVPacket *pkt,
                   void (*callback)(uint8_t *, int)) {
  printf("Encoding...\n");
  int ret;
  /* send the frame for encoding */
  ret = avcodec_send_frame(ctx, frame);
  if (ret < 0) {
    fprintf(stderr, "Error sending the frame to the encoder\n");
    exit(1);
  }
  /* read all the available output packets (in general there may be any
   * number of them */
  while (ret >= 0) {
    ret = avcodec_receive_packet(ctx, pkt);
    if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
      printf("You need to keep sending data.\n");
    } else if (ret < 0) {
      fprintf(stderr, "Error encoding audio frame\n");
      exit(1);
    } else if (ret == 0 && pkt->size > 0) {
      callback(pkt->data, pkt->size);
    }

    av_packet_unref(pkt);
  }
}

void init() {
  printf("Initializing encoder...\n");

  /* register all the codecs */
  avcodec_register_all();

  /* find the MP2 encoder */
  codec = avcodec_find_encoder(AV_CODEC_ID_AAC);
  if (!codec) {
    printf("Codec not found\n");
    exit(1);
  }

  c = avcodec_alloc_context3(codec);
  if (!c) {
    printf("Could not allocate audio codec context\n");
    exit(1);
  }

  /* put sample parameters */
  c->bit_rate = 64000;
  /* check that the encoder supports s16 pcm input */
  c->sample_fmt = AV_SAMPLE_FMT_FLTP;
  // c->profile = FF_PROFILE_AAC_MAIN;

  if (!check_sample_fmt(codec, c->sample_fmt)) {
    printf("Encoder does not support sample format %s\n",
           av_get_sample_fmt_name(c->sample_fmt));
    exit(1);
  }

  // printf("encoder format set: %d\n", c->sample_fmt);

  /* select other audio parameters supported by the encoder */
  c->sample_rate = select_sample_rate(codec);
  c->sample_rate = 48000;
  printf("sample rate: %d\n", c->sample_rate);
  c->channel_layout = select_channel_layout(codec);
  c->channels = av_get_channel_layout_nb_channels(c->channel_layout);

  /* open it */
  if (avcodec_open2(c, codec, NULL) < 0) {
    fprintf(stderr, "Could not open codec\n");
    exit(1);
  }
}

int encode(uint8_t *data, int size, void (*callback)(uint8_t *, int)) {
  AVFrame *frame = nullptr;
  AVPacket *pkt = nullptr;
  int ret;
  uint16_t *samples = nullptr;

  printf("------------------------------------------\n");

  if (!c) {
    printf("Error: Need to initialize codec context\n");
    return -1;
  }

  if (callback == nullptr) {
    printf("Please provide a callback\n");
    return -1;
  }

  if (data == nullptr) {
    printf("no data\n");
    return -1;
  }

  /* packet for holding encoded output */
  pkt = av_packet_alloc();
  if (!pkt) {
    fprintf(stderr, "could not allocate the packet\n");
    exit(1);
  }

  /* frame containing input raw audio */
  frame = av_frame_alloc();
  if (!frame) {
    fprintf(stderr, "Could not allocate audio frame\n");
    exit(1);
  }
  frame->nb_samples = c->frame_size;
  frame->format = c->sample_fmt;
  frame->channel_layout = c->channel_layout;

  /* allocate the data buffers */
  ret = av_frame_get_buffer(frame, 0);
  if (ret < 0) {
    fprintf(stderr, "Could not allocate audio data buffers\n");
    exit(1);
  }

  ret = av_frame_make_writable(frame);
  if (ret < 0) {
    printf("Error: frame not writable\n");
    exit(1);
  }

  /* Convert from planar to interleaved */
  AVAudioResampleContext *avr = avresample_alloc_context();
  if (!avr) {
    printf("Error: Could not initialize resample context\n");
    return 0;
  }

  AVSampleFormat in_sample_fmt = AV_SAMPLE_FMT_FLT;
  AVSampleFormat out_sample_fmt = AV_SAMPLE_FMT_FLTP;

  av_opt_set_int(avr, "in_channel_layout", AV_CH_LAYOUT_STEREO, 0);
  av_opt_set_int(avr, "out_channel_layout", AV_CH_LAYOUT_STEREO, 0);
  av_opt_set_int(avr, "in_sample_rate", 48000, 0);
  av_opt_set_int(avr, "out_sample_rate", 48000, 0);
  av_opt_set_int(avr, "in_sample_fmt", in_sample_fmt, 0);  // Interleaved
  av_opt_set_int(avr, "out_sample_fmt", out_sample_fmt,
                 0);  // Non-interleaved

  if (avresample_open(avr) < 0) {
    printf("Error: Could not open resample context\n");
    return 0;
  }

  uint8_t **input = nullptr;
  int in_linesize = 0, in_samples = 0;
  uint8_t *output = nullptr;
  int out_linesize;

  input = &data;
  in_samples = 1;  // maybe?
  in_linesize = size;
  printf("Size of in_linesize: %d\n", in_linesize);

  int out_samples = avresample_get_out_samples(avr, in_samples);

  av_samples_alloc(&output, &out_linesize, 2, out_samples, out_sample_fmt, 0);

  // TODO: Don't allow in_linesize to be 0 avoiding optimizations
  out_samples = avresample_convert(avr, &output, out_linesize, out_samples,
                                   input, 0, in_samples);

  printf("out_samples: %d\n", out_samples);
  printf("Size of out resampled converted data: %d\n", out_linesize);

  while (avresample_available(avr) > 0) {
    printf("reading...\n");
    avresample_read(avr, &output, out_samples);
  }

  /* encode an audio chunk */
  samples = (uint16_t *)frame->data[0];
  memcpy(samples, output, out_linesize);
  encode(c, frame, pkt, callback);

  /* flush the encoder */
  encode(c, NULL, pkt, callback);

  av_frame_free(&frame);
  av_packet_free(&pkt);

  /* For resampling */
  av_freep(&output);
  if (avr) avresample_close(avr);

  return 0;
}

void destroy() { avcodec_free_context(&c); }

}  // namespace ffmpeg_encode
	/*
	I am sending encode(uint8_t, int, void *) data from a 12 MB file then calling a flush. This is the output I get:

	Attempting to capture audio from a file...
	Initializing encoder...
	sample rate: 48000
	Read 12292096 bytes from a file
	------------------------------------------
	Size of in_linesize: 12292096
	out_samples: 1
	Size of out resampled converted data: 128
	Encoding...
	You need to keep sending data.
	Encoding...
	Received new encoded data of size 146 bytes
	Received new encoded data of size 6 bytes
	You need to keep sending data.

	It writes to a file after where test.m4a is 2 bytes for some reason

	*/

	#include "encode.h"

	extern "C" {
	#include <libavcodec/avcodec.h>
	#include <libavresample/avresample.h>
	#include <libavutil/channel_layout.h>
	#include <libavutil/common.h>
	#include <libavutil/frame.h>
	#include <libavutil/opt.h>
	#include <libavutil/samplefmt.h>
	#include <stdint.h>
	#include <stdio.h>
	#include <stdlib.h>
	}

	namespace ffmpeg_encode {

	const AVCodec *codec;
	AVCodecContext *c = NULL;

	/* check that a given sample format is supported by the encoder */
	static int check_sample_fmt(const AVCodec *codec,
	enum AVSampleFormat sample_fmt) {
	const enum AVSampleFormat *p = codec->sample_fmts;
	while (*p != AV_SAMPLE_FMT_NONE) {
	if (*p == sample_fmt) return 1;
	p++;
	}
	return 0;
	}
	/* just pick the highest supported samplerate */
	static int select_sample_rate(const AVCodec *codec) {
	const int *p;
	int best_samplerate = 0;
	if (!codec->supported_samplerates) return 44100;
	p = codec->supported_samplerates;
	while (*p) {
	if (!best_samplerate \|\| abs(44100 - *p) < abs(44100 - best_samplerate))
	best_samplerate = *p;
	p++;
	}
	return best_samplerate;
	}
	/* select layout with the highest channel count */
	static int select_channel_layout(const AVCodec *codec) {
	const uint64_t *p;
	uint64_t best_ch_layout = 0;
	int best_nb_channels = 0;
	if (!codec->channel_layouts) return AV_CH_LAYOUT_STEREO;
	p = codec->channel_layouts;
	while (*p) {
	int nb_channels = av_get_channel_layout_nb_channels(*p);
	if (nb_channels > best_nb_channels) {
	best_ch_layout = *p;
	best_nb_channels = nb_channels;
	}
	p++;
	}
	return best_ch_layout;
	}

	static void encode(AVCodecContext ctx, AVFrame frame, AVPacket *pkt,
	void (callback)(uint8_t , int)) {
	printf("Encoding...\n");
	int ret;
	/* send the frame for encoding */
	ret = avcodec_send_frame(ctx, frame);
	if (ret < 0) {
	fprintf(stderr, "Error sending the frame to the encoder\n");
	exit(1);
	}
	/* read all the available output packets (in general there may be any
	* number of them */
	while (ret >= 0) {
	ret = avcodec_receive_packet(ctx, pkt);
	if (ret == AVERROR(EAGAIN) \|\| ret == AVERROR_EOF) {
	printf("You need to keep sending data.\n");
	} else if (ret < 0) {
	fprintf(stderr, "Error encoding audio frame\n");
	exit(1);
	} else if (ret == 0 && pkt->size > 0) {
	callback(pkt->data, pkt->size);
	}

	av_packet_unref(pkt);
	}
	}

	void init() {
	printf("Initializing encoder...\n");

	/* register all the codecs */
	avcodec_register_all();

	/* find the MP2 encoder */
	codec = avcodec_find_encoder(AV_CODEC_ID_AAC);
	if (!codec) {
	printf("Codec not found\n");
	exit(1);
	}

	c = avcodec_alloc_context3(codec);
	if (!c) {
	printf("Could not allocate audio codec context\n");
	exit(1);
	}

	/* put sample parameters */
	c->bit_rate = 64000;
	/* check that the encoder supports s16 pcm input */
	c->sample_fmt = AV_SAMPLE_FMT_FLTP;
	// c->profile = FF_PROFILE_AAC_MAIN;

	if (!check_sample_fmt(codec, c->sample_fmt)) {
	printf("Encoder does not support sample format %s\n",
	av_get_sample_fmt_name(c->sample_fmt));
	exit(1);
	}

	// printf("encoder format set: %d\n", c->sample_fmt);

	/* select other audio parameters supported by the encoder */
	c->sample_rate = select_sample_rate(codec);
	c->sample_rate = 48000;
	printf("sample rate: %d\n", c->sample_rate);
	c->channel_layout = select_channel_layout(codec);
	c->channels = av_get_channel_layout_nb_channels(c->channel_layout);

	/* open it */
	if (avcodec_open2(c, codec, NULL) < 0) {
	fprintf(stderr, "Could not open codec\n");
	exit(1);
	}
	}

	int encode(uint8_t data, int size, void (callback)(uint8_t *, int)) {
	AVFrame *frame = nullptr;
	AVPacket *pkt = nullptr;
	int ret;
	uint16_t *samples = nullptr;

	printf("------------------------------------------\n");

	if (!c) {
	printf("Error: Need to initialize codec context\n");
	return -1;
	}

	if (callback == nullptr) {
	printf("Please provide a callback\n");
	return -1;
	}

	if (data == nullptr) {
	printf("no data\n");
	return -1;
	}

	/* packet for holding encoded output */
	pkt = av_packet_alloc();
	if (!pkt) {
	fprintf(stderr, "could not allocate the packet\n");
	exit(1);
	}

	/* frame containing input raw audio */
	frame = av_frame_alloc();
	if (!frame) {
	fprintf(stderr, "Could not allocate audio frame\n");
	exit(1);
	}
	frame->nb_samples = c->frame_size;
	frame->format = c->sample_fmt;
	frame->channel_layout = c->channel_layout;

	/* allocate the data buffers */
	ret = av_frame_get_buffer(frame, 0);
	if (ret < 0) {
	fprintf(stderr, "Could not allocate audio data buffers\n");
	exit(1);
	}

	ret = av_frame_make_writable(frame);
	if (ret < 0) {
	printf("Error: frame not writable\n");
	exit(1);
	}

	/* Convert from planar to interleaved */
	AVAudioResampleContext *avr = avresample_alloc_context();
	if (!avr) {
	printf("Error: Could not initialize resample context\n");
	return 0;
	}

	AVSampleFormat in_sample_fmt = AV_SAMPLE_FMT_FLT;
	AVSampleFormat out_sample_fmt = AV_SAMPLE_FMT_FLTP;

	av_opt_set_int(avr, "in_channel_layout", AV_CH_LAYOUT_STEREO, 0);
	av_opt_set_int(avr, "out_channel_layout", AV_CH_LAYOUT_STEREO, 0);
	av_opt_set_int(avr, "in_sample_rate", 48000, 0);
	av_opt_set_int(avr, "out_sample_rate", 48000, 0);
	av_opt_set_int(avr, "in_sample_fmt", in_sample_fmt, 0); // Interleaved
	av_opt_set_int(avr, "out_sample_fmt", out_sample_fmt,
	0); // Non-interleaved

	if (avresample_open(avr) < 0) {
	printf("Error: Could not open resample context\n");
	return 0;
	}

	uint8_t **input = nullptr;
	int in_linesize = 0, in_samples = 0;
	uint8_t *output = nullptr;
	int out_linesize;

	input = &data;
	in_samples = 1; // maybe?
	in_linesize = size;
	printf("Size of in_linesize: %d\n", in_linesize);

	int out_samples = avresample_get_out_samples(avr, in_samples);

	av_samples_alloc(&output, &out_linesize, 2, out_samples, out_sample_fmt, 0);

	// TODO: Don't allow in_linesize to be 0 avoiding optimizations
	out_samples = avresample_convert(avr, &output, out_linesize, out_samples,
	input, 0, in_samples);

	printf("out_samples: %d\n", out_samples);
	printf("Size of out resampled converted data: %d\n", out_linesize);

	while (avresample_available(avr) > 0) {
	printf("reading...\n");
	avresample_read(avr, &output, out_samples);
	}

	/* encode an audio chunk */
	samples = (uint16_t *)frame->data[0];
	memcpy(samples, output, out_linesize);
	encode(c, frame, pkt, callback);

	/* flush the encoder */
	encode(c, NULL, pkt, callback);

	av_frame_free(&frame);
	av_packet_free(&pkt);

	/* For resampling */
	av_freep(&output);
	if (avr) avresample_close(avr);

	return 0;
	}

	void destroy() { avcodec_free_context(&c); }

	} // namespace ffmpeg_encode