Skip to content

Instantly share code, notes, and snippets.

@maxammann
Last active May 21, 2018 01:51
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save maxammann/137176f1dcd0e4f596e8 to your computer and use it in GitHub Desktop.
Save maxammann/137176f1dcd0e4f596e8 to your computer and use it in GitHub Desktop.
Example how to visualize libav output in a spectrum
int open_file(char *file_path, AVFormatContext **fmt_ctx, AVCodecContext **dec_ctx) {
int audio_stream_index;
AVCodec *codec;
// Find codec and stream
if (avformat_open_input(fmt_ctx, file_path, NULL, NULL) < 0) {
av_log(NULL, AV_LOG_ERROR, "Cannot open input file\n");
return -1;
}
if (avformat_find_stream_info(*fmt_ctx, NULL) < 0) {
av_log(NULL, AV_LOG_ERROR, "Cannot find stream information\n");
return -1;
}
if ((audio_stream_index = av_find_best_stream(*fmt_ctx, AVMEDIA_TYPE_AUDIO, -1, -1, &codec, 0)) < 0) {
av_log(NULL, AV_LOG_ERROR, "Cannot find a audio stream in the input file\n");
return -1;
}
*dec_ctx = (*fmt_ctx)->streams[audio_stream_index]->codec;
// Open codec
if (avcodec_open2(*dec_ctx, codec, NULL) < 0) {
av_log(NULL, AV_LOG_ERROR, "Cannot open audio decoder\n");
return -1;
}
return audio_stream_index;
}
enum AVSampleFormat init_resampling(AVAudioResampleContext **out_resample, AVCodecContext *dec_ctx) {
AVAudioResampleContext *resample = avresample_alloc_context();
int64_t layout = av_get_default_channel_layout(dec_ctx->channels);
int sample_rate = dec_ctx->sample_rate;
enum AVSampleFormat output_fmt = AV_SAMPLE_FMT_S16;
av_opt_set_int(resample, "in_channel_layout", layout, 0);
av_opt_set_int(resample, "out_channel_layout", layout, 0);
av_opt_set_int(resample, "in_sample_rate", sample_rate, 0);
av_opt_set_int(resample, "out_sample_rate", sample_rate, 0);
av_opt_set_int(resample, "in_sample_fmt", dec_ctx->sample_fmt, 0);
av_opt_set_int(resample, "out_sample_fmt", output_fmt, 0);
avresample_open(resample);
*out_resample = resample;
return output_fmt;
}
int audio_play(char *file_path) {
// Packet
AVPacket packet;
av_init_packet(&packet);
// Frame
AVFrame *frame = avcodec_alloc_frame();
// Contexts
AVAudioResampleContext *resample = 0;
AVFormatContext *fmt_ctx = 0;
AVCodecContext *dec_ctx = 0;
int audio_stream_index = open_file(file_path, &fmt_ctx, &dec_ctx);
if (audio_stream_index < 0) {
av_log(NULL, AV_LOG_ERROR, "Error opening file\n");
return audio_stream_index;
}
// Setup resampling
enum AVSampleFormat output_fmt = init_resampling(&resample, dec_ctx);
visualize_init(4096 / sizeof(int16_t)); // 4096 is the default sample size of libav
while (1) {
if ((av_read_frame(fmt_ctx, &packet)) < 0) {
break;
}
if (packet.stream_index == audio_stream_index) {
int got_frame = 0;
ret = avcodec_decode_audio4(dec_ctx, frame, &got_frame, &packet);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Error decoding audio\n");
continue;
}
if (got_frame) {
//Normalize the stream by resampling it
uint8_t *output;
int out_linesize;
int out_samples = avresample_get_out_samples(resample, frame->nb_samples);
av_samples_alloc(&output, &out_linesize, 2, out_samples, output_fmt, 0);
avresample_convert(resample, &output, out_linesize, out_samples,
frame->data, frame->linesize[0], frame->nb_samples);
buffer_visualize((int16_t *) output);
av_freep(&output);
}
}
}
}
#define HEIGHT 32
#define WIDTH 32
static int16_t left_bands[WIDTH]; // Left channel frequency bands
static int16_t right_bands[WIDTH]; // Right channel frequency bands
static RDFTContext *ctx;
static int N, samples; // N and number of samples to process each step
void visualize_init(int samples_) {
samples = samples_;
N = samples_ / 2; // left/right channels
ctx = av_rdft_init((int) log2(N), DFT_R2C);
}
void buffer_visualize(int16_t *data) {
int i, tight_index; // just some iterator indices
float left_data[N * 2];
float right_data[N * 2];
for (i = 0, tight_index = 0; i < samples; i += 2) {
int16_t left = data[i];
int16_t right = data[i + 1];
double window_modifier = (0.5 * (1 - cos(2 * M_PI * tight_index / (N - 1)))); // Hann (Hanning) window function
float value = (float) (window_modifier * ((left) / 32768.0f)); // Convert to float and apply
// cap values above 1 and below -1
if (value > 1.0) {
value = 1;
} else if (value < -1.0) {
value = -1;
}
left_data[tight_index] = value;
value = (float) (window_modifier * ((right) / 32768.0f));
if (value > 1.0) {
value = 1;
} else if (value < -1.0) {
value = -1;
}
right_data[tight_index] = value;
tight_index++;
}
av_rdft_calc(ctx, left_data);
av_rdft_calc(ctx, right_data);
int size = N / 2 * 2; // half is usable, but we have re and im
for (i = 0, tight_index = 0; i < size; i += size / WIDTH) {
float im = left_data[i];
float re = left_data[i + 1];
double mag = sqrt(im * im + re * re);
// Visualize magnitude of i-th band
left_bands[tight_index] = (int16_t) (mag * HEIGHT);
tight_index++;
}
for (i = 0, tight_index = 0; i < size; i += size / WIDTH) {
float im = right_data[i];
float re = right_data[i + 1];
double mag = 10 * log10(im * im + re * re);
right_bands[tight_index] = (int16_t) (mag * HEIGHT);
tight_index++;
}
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment