Skip to content

Instantly share code, notes, and snippets.

Last active June 13, 2023 09:57
Show Gist options
  • Save ayuusweetfish/50764b32880710f9ec8b95de353a18fb to your computer and use it in GitHub Desktop.
Save ayuusweetfish/50764b32880710f9ec8b95de353a18fb to your computer and use it in GitHub Desktop.
bga_compo: BMS (Be-Music Source) background animation to video encoder

BGA Compo

bga_compo produces videos from Be-Music Source (BMS) files. It takes a BMS file, reads its accompanying images and sounds, and outputs the video and audio streams in raw formats that can be easily encoded into common video formats.

To compile:

cc bga_compo.c bmflat.c stb_vorbis.c -O2 -o bga_compo

# Test. Should print a few lines and a few pixels.
./bga_compo -v twinklesky/ | od -N 10

Usage: ./bga_compo -v <BMS file> produces the BGA video stream in raw RGB24 format at their original dimensions, row-major with no padding. ./bga_compo -a <BMS file> produces the audio stream in raw PCM format in signed 16-bit little endian integers at 44.1 kHz stereo.

To output to a video file, use FFmpeg:

ffmpeg \
  -f rawvideo -pixel_format rgb24 -video_size 256x256 -framerate 30 \
    -i <(./bga_compo -v twinklesky/ \
  -f s16le -ar 44.1k -ac 2 \
    -i <(./bga_compo -a twinklesky/ \
  -pix_fmt yuv420p -crf 28 -b:a 64k twinklesky.mp4

Please change "256x256" to the corresponding image dimensions of the specific track being processed. Feel free to change the encoder settings -crf, -b:a, etc.

The track used throughout development is ☆ twinklesky ☆ by fether. It can be obtained here.

Caveat: Only tested on *nix. Should also work on Windows if the path is given with forward slashes /, but this has not been tested.

#include "bmflat.h"
#include "stb_image.h"
#include "stb_vorbis.c"
#include "miniaudio.h"
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char *read_file(const char *path)
FILE *f = fopen(path, "r");
if (f == NULL) return NULL;
char *buf = NULL;
do {
if (fseek(f, 0, SEEK_END) != 0) break;
long len = ftell(f);
if (fseek(f, 0, SEEK_SET) != 0) break;
if ((buf = (char *)malloc(len)) == NULL) break;
if (fread(buf, len, 1, f) != 1) { free(buf); buf = NULL; break; }
} while (0);
return buf;
char *strdupcat(const char *s1, const char *s2)
if (s1 == NULL) return strdup(s2);
size_t len1 = strlen(s1);
size_t len2 = strlen(s2);
char *buf = (char *)malloc(len1 + len2 + 1);
if (buf == NULL) return NULL;
memcpy(buf, s1, len1);
memcpy(buf + len1, s2, len2);
buf[len1 + len2] = '\0';
return buf;
char *strdupcat3(const char *s1, const char *s2, const char *s3)
if (s1 == NULL) return strdupcat(s2, s3);
size_t len1 = strlen(s1);
size_t len2 = strlen(s2);
size_t len3 = strlen(s3);
char *buf = (char *)malloc(len1 + len2 + len3 + 1);
if (buf == NULL) return NULL;
memcpy(buf, s1, len1);
memcpy(buf + len1, s2, len2);
memcpy(buf + len1 + len2, s3, len3);
buf[len1 + len2 + len3] = '\0';
return buf;
int main(int argc, char *argv[])
int arg_ptr = 1;
int is_video = 1;
if (arg_ptr < argc && argv[arg_ptr][0] == '-') {
if (argv[arg_ptr][1] == 'a') is_video = 0;
else if (argv[arg_ptr][1] != 'v' && argv[arg_ptr][1] != '-') {
fprintf(stderr, "Unrecognized option: %s", argv[arg_ptr]);
return 1;
int is_audio = !is_video;
if (arg_ptr + 1 > argc) {
fprintf(stderr, "Usage: %s [-v|-a] <BMS file>\n", argv[0]);
return 0;
const char *bms_path = argv[arg_ptr];
// Extract working directory
char *bms_wdir = strdup(bms_path);
char *dirsep = strrchr(bms_wdir, '/');
if (!dirsep) {
bms_wdir = NULL;
} else {
*(dirsep + 1) = '\0';
fprintf(stderr, "Loading chart\n");
char *src = read_file(bms_path);
if (src == NULL) {
fprintf(stderr, "Cannot read file %s\n", bms_path);
return 1;
struct bm_chart chart;
int msgs = bm_load(&chart, src);
for (int i = 0; i < msgs; i++)
fprintf(stderr, "Log: Line %d: %s\n", bm_logs[i].line, bm_logs[i].message);
// Bitmaps
int bitmaps_w = -1, bitmaps_h = -1;
uint8_t *bitmaps_pix[BM_INDEX_MAX];
if (is_video) {
fprintf(stderr, "Loading bitmaps\n");
for (int i = 0; i < BM_INDEX_MAX; i++) if (chart.tables.bmp[i] != NULL) {
char *bmp_path = strdupcat(bms_wdir, chart.tables.bmp[i]);
int w, h;
bitmaps_pix[i] = stbi_load(bmp_path, &w, &h, NULL, 3);
if (bitmaps_pix[i] == NULL) {
fprintf(stderr, "Cannot load bitmap %s\n", chart.tables.bmp[i]);
return 1;
if (bitmaps_w == -1) {
bitmaps_w = w;
bitmaps_h = h;
fprintf(stderr, "Image size is %dx%d\n", w, h);
} else if (w != bitmaps_w || h != bitmaps_h) {
fprintf(stderr, "Bitmap %s has dimensions %dx%d, different from initial\n",
chart.tables.bmp[i], w, h);
return 1;
// Waves
const char *wave_exts[] = {
".ogg", ".wav", ".mp3",
".OGG", ".WAV", ".MP3",
struct wave {
int16_t *pcm;
int len, ptr;
} waves[BM_INDEX_MAX];
int n_ch = 2;
int sample_rate = 44100;
if (is_audio) {
fprintf(stderr, "Loading waves\n");
ma_decoder_config dec_cfg = ma_decoder_config_init(ma_format_s16, n_ch, sample_rate);
fprintf(stderr, "Audio has %d channels at sample rate %d Hz\n", n_ch, sample_rate);
for (int i = 0; i < BM_INDEX_MAX; i++) if (chart.tables.wav[i] != NULL) {
// Remove extension
char *ext = strrchr(chart.tables.wav[i], '.');
if (ext != NULL) *ext = '\0';
// Try different extensions and formats
int succeeded = 0;
for (int j = 0; j < sizeof wave_exts / sizeof wave_exts[0]; j++) {
char *wav_path = strdupcat3(bms_wdir, chart.tables.wav[i], wave_exts[j]);
ma_uint64 len;
ma_result result = ma_decode_file(wav_path, &dec_cfg, &len, (void **)&waves[i].pcm);
if (result == MA_SUCCESS) {
succeeded = 1;
waves[i].len = len;
if (!succeeded) {
fprintf(stderr, "Cannot load wave %s\n", chart.tables.wav[i]);
for (int j = 0; j < sizeof wave_exts / sizeof wave_exts[0]; j++)
printf("Tried: %s%s%s\n", bms_wdir, chart.tables.wav[i], wave_exts[j]);
return 1;
for (int i = 0; i < BM_INDEX_MAX; i++) waves[i].ptr = -1;
struct bm_seq seq;
bm_to_seq(&chart, &seq);
double fps = 30;
int n_frames = 0;
int n_samples = 0;
double time = 0;
double tempo = chart.meta.init_tempo;
int bg = -1;
int fg = -1;
for (int i = 0; i < seq.event_count; i++) {
struct bm_event ev =[i];
int delta_ticks = ev.pos - (i == 0 ? 0 :[i - 1].pos);
int last_time = (int)time;
time += delta_ticks * (60.0 / 48.0 / tempo);
// for (; last_time < (int)time; last_time++)
// fprintf(stderr, "%d:%02d\n", last_time / 60, last_time % 60);
if (is_video) {
while (n_frames < time * fps - 1e-6) {
// Output a frame
for (int i = 0; i < bitmaps_h * bitmaps_w; i++) {
uint8_t pix[3] = { 0 };
if (bg != -1) {
pix[0] = bitmaps_pix[bg][i * 3 + 0];
pix[1] = bitmaps_pix[bg][i * 3 + 1];
pix[2] = bitmaps_pix[bg][i * 3 + 2];
if (fg != -1) {
uint8_t fg_pix[3];
fg_pix[0] = bitmaps_pix[fg][i * 3 + 0];
fg_pix[1] = bitmaps_pix[fg][i * 3 + 1];
fg_pix[2] = bitmaps_pix[fg][i * 3 + 2];
if (fg_pix[0] != 0 || fg_pix[1] != 0 || fg_pix[2] != 0)
memcpy(pix, fg_pix, sizeof pix);
for (int i = 0; i < 3; i++) putchar(pix[i]);
} else if (is_audio) {
int n_new_samples = (int)(time * sample_rate - 1e-6) - n_samples;
if (n_new_samples > 0) {
// Batch process samples, in order to be a bit more efficient
int32_t *buf = (int32_t *)malloc(sizeof(int32_t) * n_new_samples * n_ch);
memset(buf, 0, sizeof(int32_t) * n_new_samples * n_ch);
for (int i = 0; i < BM_INDEX_MAX; i++) if (waves[i].ptr >= 0) {
int j;
for (j = 0; j < n_new_samples && waves[i].ptr + j < waves[i].len; j++) {
for (int c = 0; c < n_ch; c++)
buf[j * n_ch + c] += waves[i].pcm[(waves[i].ptr + j) * n_ch + c];
waves[i].ptr += j;
if (waves[i].ptr >= waves[i].len) waves[i].ptr = -1;
for (int i = 0; i < n_new_samples * n_ch; i++) {
int32_t orig_mix = (buf[i] * 2) >> 2;
int16_t sample = (orig_mix > INT16_MAX ? INT16_MAX :
(orig_mix < INT16_MIN ? INT16_MIN : orig_mix));
// Little-endian
putchar((uint8_t)(sample & 0xff));
putchar((uint8_t)((sample >> 8) & 0xff));
n_samples += n_new_samples;
if (ev.type == BM_TEMPO_CHANGE) tempo = ev.value_f;
else if (ev.type == BM_BGA_BASE_CHANGE) bg = ev.value;
else if (ev.type == BM_BGA_LAYER_CHANGE) fg = ev.value;
else if (ev.type == BM_NOTE || ev.type == BM_NOTE_LONG) waves[ev.value].ptr = 0;
return 0;
To the extent possible under law, the author(s) have dedicated all copyright and related and neighbouring rights to this software to the public domain worldwide.
For more details on the CC0 Public Domain Dedication, please refer to <>.
Note: this only applies to this file and README. The libraries bundled have their own legal codes.
#include "bmflat.h"
#include <ctype.h>
#include <errno.h>
#include <math.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
struct bm_log *bm_logs = NULL;
static int log_cap, log_ptr;
static void reset_logs()
if (bm_logs) free(bm_logs);
bm_logs = NULL;
log_cap = log_ptr = 0;
static void ensure_log_cap()
if (log_cap <= log_ptr) {
log_cap = (log_cap == 0 ? 8 : (log_cap << 1));
bm_logs = (struct bm_log *)
realloc(bm_logs, log_cap * sizeof(struct bm_log));
#define emit_log(_line, ...) do { \
ensure_log_cap(); \
bm_logs[log_ptr].line = _line; \
snprintf(bm_logs[log_ptr].message, BM_MSG_LEN, __VA_ARGS__); \
log_ptr++; \
} while (0)
static inline int is_space_or_linebreak(char ch)
return ch == '\r' || ch == '\n' || ch == '\0';
static inline int isbase36(char ch)
return (ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'Z');
static inline int base36(char c1, char c2)
// Assumes isbase36(c1) and isbase36(c2) are true
(c1 <= '9' ? c1 - '0' : c1 - 'A' + 10) * 36 +
(c2 <= '9' ? c2 - '0' : c2 - 'A' + 10);
static inline void add_note(struct bm_track *track, short bar, float beat, short value)
if (track->note_cap <= track->note_count) {
track->note_cap = (track->note_cap == 0 ? 8 : (track->note_cap << 1));
track->notes = (struct bm_note *)
realloc(track->notes, track->note_cap * sizeof(struct bm_note));
track->notes[track->note_count].bar = bar;
track->notes[track->note_count].hold = false;
track->notes[track->note_count].beat = beat;
track->notes[track->note_count++].value = value;
static inline void parse_track(int line, char *s, struct bm_track *track, short bar)
int count = 0;
for (char *p = s; *p != '\0'; p++) count += (!isspace(*p));
count /= 2;
for (int p = 0, q, i = 0; s[p] != '\0'; p = q + 1) {
while (isspace(s[p]) && s[p] != '\0') p++;
if (s[p] == '\0') break;
q = p + 1;
while (isspace(s[q]) && s[q] != '\0') q++;
if (s[q] == '\0') {
emit_log(line, "Extraneous trailing character %c, ignoring", s[p]);
if (!isbase36(s[p]) || !isbase36(s[q])) {
emit_log(line, "Invalid base-36 index %c%c at column %d, ignoring",
s[p], s[q], p + 8);
int value = base36(s[p], s[q]);
if (value != 0) add_note(track, bar, (float)i / count, value);
static int note_time_compare(const void *_lhs, const void *_rhs)
const struct bm_note *lhs = (const struct bm_note *)_lhs;
const struct bm_note *rhs = (const struct bm_note *)_rhs;
float diff = (lhs->bar - rhs->bar) + (lhs->beat - rhs->beat);
return (diff < -1e-6 ? -1 : (diff > +1e-6 ? +1 : 0));
static inline void sort_track(struct bm_track *track, int *max_bars)
// A stable sorting algorithm
qsort(track->notes, track->note_count,
sizeof(struct bm_note), note_time_compare);
// Remove duplicates
int p, q;
float last_time = -1;
for (p = 0, q = -1; p < track->note_count; p++) {
float cur_time = track->notes[p].bar + track->notes[p].beat;
if (cur_time - last_time > 1e-6) q++;
if (p != q) track->notes[q] = track->notes[p];
last_time = cur_time;
track->note_count = q + 1;
// Update maximum bar number
if (track->note_count > 0 &&
*max_bars < track->notes[track->note_count - 1].bar)
*max_bars = track->notes[track->note_count - 1].bar;
int bm_load(struct bm_chart *chart, const char *_source)
char *source = strdup(_source);
chart->meta.player_num = -1;
chart->meta.genre = NULL;
chart->meta.title = NULL;
chart->meta.artist = NULL;
chart->meta.subartist = NULL;
chart->meta.init_tempo = -1;
chart->meta.play_level = -1;
chart->meta.judge_rank = -1;
chart->meta.gauge_total = -1;
chart->meta.difficulty = -1; // Omissible
chart->meta.stage_file = NULL;
chart->meta.banner = NULL;
chart->meta.back_bmp = NULL;
memset(&chart->tables.wav, 0, sizeof chart->tables.wav);
memset(&chart->tables.bmp, 0, sizeof chart->tables.bmp);
for (int i = 0; i < BM_INDEX_MAX; i++) chart->tables.tempo[i] = -1;
memset(&chart->tables.stop, -1, sizeof chart->tables.stop);
memset(&chart->tracks, 0, sizeof chart->tracks);
int len = strlen(source);
int ptr = 0, next = 0, line = 1;
// Temporary storage
int bg_index[BM_BARS_COUNT] = { 0 };
bool track_appeared[BM_BARS_COUNT][60] = { false };
int lnobj = -1;
for (; ptr != len; ptr = ++next, line++) {
// Advance to the next line break
while (!is_space_or_linebreak(source[next])) next++;
if (source[next] == '\r' && next + 1 < len && source[next + 1] == '\n') next++;
// Trim at both ends
while (ptr < next && isspace(source[ptr])) ptr++;
int end = next;
while (end >= ptr && isspace(source[end])) end--;
source[++end] = '\0';
// Comment
if (source[ptr] != '#') continue;
// Skip the # character
char *s = source + ptr + 1;
int line_len = end - ptr - 1;
if (line_len >= 6 && isdigit(s[0]) && isdigit(s[1]) && isdigit(s[2]) &&
isdigit(s[3]) && isdigit(s[4]) && s[5] == ':')
// Track data
int bar = s[0] * 100 + s[1] * 10 + s[2] - '0' * 111;
int track = s[3] * 10 + s[4] - '0' * 11;
if (track >= 3 && track <= 69 && track != 5 && track % 10 != 0 &&
emit_log(line, "Track %02d already defined previously, "
"merging all notes", track);
track_appeared[bar][track] = true;
if (track == 2) {
// Time signature
errno = 0;
float x = strtof(s + 6, NULL);
if (errno != EINVAL && x >= 0.25 && x <= 63.75) {
int y = (int)(x * 4 + 0.5);
if (fabs(y - x * 4) >= 1e-3)
emit_log(line, "Inaccurate time signature, treating as %d/4", y);
if (chart->tracks.time_sig[bar] != 0)
emit_log(line, "Time signature for bar %03d "
"defined multiple times, overwriting", bar);
chart->tracks.time_sig[bar] = y;
} else {
emit_log(line, "Invalid time signature, should be a "
"multiple of 0.25 between 0.25 and 63.75 (inclusive)");
} else if (track == 3) {
// Tempo change
parse_track(line, s + 6, &chart->tracks.tempo, bar);
} else if (track == 4) {
// BGA
parse_track(line, s + 6, &chart->tracks.bga_base, bar);
} else if (track == 6) {
// BGA poor
parse_track(line, s + 6, &chart->tracks.bga_poor, bar);
} else if (track == 7) {
// BGA layer
parse_track(line, s + 6, &chart->tracks.bga_layer, bar);
} else if (track == 8) {
// Extended tempo change
parse_track(line, s + 6, &chart->tracks.ex_tempo, bar);
} else if (track == 9) {
// Stop
parse_track(line, s + 6, &chart->tracks.stop, bar);
} else if (track >= 10 && track <= 69 && track % 10 != 0) {
// Fixed
parse_track(line, s + 6, &chart->tracks.object[track - 10], bar);
} else if (track == 1) {
if (bg_index[bar] == BM_BGM_TRACKS) {
emit_log(line, "Too many background tracks (more than %d) "
"for bar %03d, ignoring", BM_BGM_TRACKS, bar);
} else {
parse_track(line, s + 6, &chart->tracks.background[bg_index[bar]], bar);
if (chart->tracks.background_count < bg_index[bar])
chart->tracks.background_count = bg_index[bar];
} else {
emit_log(line, "Unknown track %c%c, ignoring", s[3], s[4]);
} else {
// Command
int arg = 0;
while (arg < line_len && !isspace(s[arg])) arg++;
s[arg++] = '\0';
while (arg < line_len && isspace(s[arg])) arg++;
if (arg >= line_len) {
emit_log(line, "Command requires non-empty arguments, ignoring");
#define checked_parse_int(_var, _min, _max, ...) do { \
errno = 0; \
long x = strtol(s + arg, NULL, 10); \
if (errno != EINVAL && x >= (_min) && x <= (_max)) { \
if ((_var) != -1) emit_log(line, __VA_ARGS__); \
(_var) = x; \
} else { \
emit_log(line, "Invalid integral value, should be " \
"between %d and %d (inclusive)", (_min) ,(_max)); \
} \
} while (0)
#define checked_parse_float(_var, _min, _max, ...) do { \
errno = 0; \
float x = strtof(s + arg, NULL); \
if (errno != EINVAL && x >= (_min) && x <= (_max)) { \
if ((_var) != -1) emit_log(line, __VA_ARGS__); \
(_var) = x; \
} else { \
emit_log(line, "Invalid integral value, should be " \
"between %g and %g (inclusive)", (_min) ,(_max)); \
} \
} while (0)
#define checked_strdup(_var, ...) do { \
char *x = strdup(s + arg); \
/* TODO: Handle cases of memory exhaustion? */ \
if (x != NULL) { \
if ((_var) != NULL) { free(_var); emit_log(line, __VA_ARGS__); } \
(_var) = x; \
} \
} while (0)
if (strcmp(s, "PLAYER") == 0) {
1, 3,
"Multiple PLAYER commands, overwritten");
} else if (strcmp(s, "GENRE") == 0) {
"Multiple GENRE commands, overwritten");
} else if (strcmp(s, "TITLE") == 0) {
"Multiple TITLE commands, overwritten");
} else if (strcmp(s, "ARTIST") == 0) {
"Multiple ARTIST commands, overwritten");
} else if (strcmp(s, "SUBARTIST") == 0) {
"Multiple SUBARTIST commands, overwritten");
} else if (strcmp(s, "BPM") == 0) {
1.0, 999.0,
"Multiple BPM commands, overwritten");
} else if (strcmp(s, "PLAYLEVEL") == 0) {
1, 999,
"Multiple PLAYLEVEL commands, overwritten");
} else if (strcmp(s, "RANK") == 0) {
0, 3,
"Multiple RANK commands, overwritten");
} else if (strcmp(s, "TOTAL") == 0) {
1, 9999,
"Multiple TOTAL commands, overwritten");
} else if (strcmp(s, "DIFFICULTY") == 0) {
1, 5,
"Multiple DIFFICULTY commands, overwritten");
} else if (strcmp(s, "STAGEFILE") == 0) {
"Multiple STAGEFILE commands, overwritten");
} else if (strcmp(s, "BANNER") == 0) {
"Multiple BANNER commands, overwritten");
} else if (strcmp(s, "BACKBMP") == 0) {
"Multiple BACKBMP commands, overwritten");
} else if (memcmp(s, "WAV", 3) == 0 && isbase36(s[3]) && isbase36(s[4])) {
int index = base36(s[3], s[4]);
"Wave %c%c specified multiple times, overwritten", s[3], s[4]);
} else if (memcmp(s, "BMP", 3) == 0 && isbase36(s[3]) && isbase36(s[4])) {
int index = base36(s[3], s[4]);
"Bitmap %c%c specified multiple times, overwritten", s[3], s[4]);
} else if (memcmp(s, "BPM", 3) == 0 && isbase36(s[3]) && isbase36(s[4])) {
int index = base36(s[3], s[4]);
1.0, 999.0,
"Tempo %c%c specified multiple times, overwritten", s[3], s[4]);
} else if (memcmp(s, "STOP", 4) == 0 && isbase36(s[4]) && isbase36(s[5])) {
int index = base36(s[4], s[5]);
0, 32767,
"Stop %c%c specified multiple times, overwritten", s[4], s[5]);
} else if (strcmp(s, "LNOBJ") == 0) {
if (isbase36(s[arg]) && isbase36(s[arg + 1])) {
if (lnobj != -1)
emit_log(line, "Multiple LNOBJ commands, overwritten");
lnobj = base36(s[arg], s[arg + 1]);
} else {
emit_log(line, "Invalid base-36 index %c%c, ignoring",
s[arg], s[arg + 1]);
} else {
emit_log(line, "Unrecognized command %s, ignoring", s);
// Postprocessing
// Reinterpret base-36 as base-16
for (int i = 0; i < chart->tracks.tempo.note_count; i++) {
int x = chart->tracks.tempo.notes[i].value;
chart->tracks.tempo.notes[i].value = (x / 36) * 16 + (x % 36);
// Sort notes and handle coincident overwrites
// Also keep track of the maximum bar number
int max_bars = 0;
for (int i = 0; i < 60; i++) sort_track(&chart->tracks.object[i], &max_bars);
sort_track(&chart->tracks.tempo, &max_bars);
sort_track(&chart->tracks.bga_base, &max_bars);
sort_track(&chart->tracks.bga_layer, &max_bars);
sort_track(&chart->tracks.bga_poor, &max_bars);
sort_track(&chart->tracks.ex_tempo, &max_bars);
sort_track(&chart->tracks.stop, &max_bars);
// Handle long notes
// NOTE: #LNTYPE is not supported and is object to LNTYPE 1
for (int i = 0; i < 20; i++) // Indices 11-29
for (int j = 1; j < chart->tracks.object[i].note_count; j++) {
if (chart->tracks.object[i].notes[j].value == lnobj &&
chart->tracks.object[i].notes[j - 1].value != -1)
chart->tracks.object[i].notes[j].value = -1;
chart->tracks.object[i].notes[j - 1].hold = true;
for (int i = 40; i < 60; i++) // Indices 51-69
for (int j = 1; j < chart->tracks.object[i].note_count; j++) {
if (chart->tracks.object[i].notes[j].value ==
chart->tracks.object[i].notes[j - 1].value)
chart->tracks.object[i].notes[j].value = -1;
chart->tracks.object[i].notes[j - 1].hold = true;
// Fill in missing time signatures
for (int i = 0; i <= max_bars; i++)
if (chart->tracks.time_sig[i] == 0)
chart->tracks.time_sig[i] = 4;
#define check_default(_var, _name, _initial, _val) do { \
if ((_var) == (_initial)) { \
emit_log(-1, "Command " _name " did not appear, defaulting to " #_val); \
(_var) = (_val); \
} \
} while (0)
#define check_default_no_log(_var, _name, _initial, _val) do { \
if ((_var) == (_initial)) (_var) = (_val); \
} while (0)
check_default(chart->meta.player_num, "PLAYER", -1, 1);
check_default(chart->meta.genre, "GENRE", NULL, strdup("(unknown)"));
check_default(chart->meta.title, "TITLE", NULL, strdup("(unknown)"));
check_default(chart->meta.artist, "ARTIST", NULL, strdup("(unknown)"));
check_default_no_log(chart->meta.subartist, "SUBARTIST", NULL, strdup("(unknown)"));
check_default(chart->meta.init_tempo, "BPM", -1, 130);
check_default(chart->meta.play_level, "LEVEL", -1, 3);
check_default_no_log(chart->meta.judge_rank, "RANK", -1, 3);
check_default_no_log(chart->meta.gauge_total, "TOTAL", -1, 160);
check_default_no_log(chart->meta.stage_file, "STAGEFILE", NULL, strdup("(none)"));
check_default_no_log(chart->meta.banner, "BANNER", NULL, strdup("(none)"));
check_default_no_log(chart->meta.back_bmp, "BACKBMP", NULL, strdup("(none)"));
return log_ptr;
static inline void add_event_arr(
struct bm_event **arr, struct bm_event *event, int *size, int *cap)
// XXX: More DRY
if (*cap <= *size) {
*cap = (*cap == 0 ? 8 : (*cap << 1));
*arr = (struct bm_event *)
realloc(*arr, (*cap) * sizeof(struct bm_event));
(*arr)[(*size)++] = *event;
static inline int event_pos_type_compare(const void *_lhs, const void *_rhs)
struct bm_event *lhs = (struct bm_event *)_lhs;
struct bm_event *rhs = (struct bm_event *)_rhs;
int diff = lhs->pos - rhs->pos;
return (diff == 0 ? lhs->type - rhs->type : diff);
void bm_to_seq(struct bm_chart *chart, struct bm_seq *seq)
memset(seq, 0, sizeof(struct bm_seq));
int cap = 0;
int bar_start[BM_BARS_COUNT];
struct bm_event event;
#define add_event() add_event_arr(&seq->events, &event, &seq->event_count, &cap)
// Bar lines
for (int i = 0, beats = 0; i < BM_BARS_COUNT; i++) {
bar_start[i] = beats;
event.pos = beats * 48;
event.type = BM_BARLINE;
event.track = 0;
event.value = i;
event.value_a = chart->tracks.time_sig[i];
beats += chart->tracks.time_sig[i];
if (chart->tracks.time_sig[i] == 0) break;
struct bm_note *note;
#define track_each(_track) \
(int j = 0; j < (_track).note_count && (note = (_track).notes + j); j++)
#define pos(_note) (bar_start[(_note)->bar] * 48 + \
(int)((_note)->beat * chart->tracks.time_sig[(_note)->bar] * 48))
// Tempo changes
// Track 03
for track_each(chart->tracks.tempo) {
event.pos = pos(note);
event.type = BM_TEMPO_CHANGE;
event.track = 3;
event.value_f = note->value;
// Track 08
for track_each(chart->tracks.ex_tempo) {
event.pos = pos(note);
event.type = BM_TEMPO_CHANGE;
event.track = 8;
event.value_f = chart->tables.tempo[note->value];
// BGA changes
// Track 04: base
for track_each(chart->tracks.bga_base) {
event.pos = pos(note);
event.type = BM_BGA_BASE_CHANGE;
event.track = 4;
event.value = note->value;
// Track 07: layer
for track_each(chart->tracks.bga_layer) {
event.pos = pos(note);
event.type = BM_BGA_LAYER_CHANGE;
event.track = 7;
event.value = note->value;
// Track 06: poor
for track_each(chart->tracks.bga_poor) {
event.pos = pos(note);
event.type = BM_BGA_POOR_CHANGE;
event.track = 6;
event.value = note->value;
// Stops
for track_each(chart->tracks.stop) {
event.pos = pos(note);
event.type = BM_STOP;
event.track = 9;
event.value = chart->tables.stop[note->value];
// Object tracks
// Backgrounds
for (int i = 0; i < chart->tracks.background_count; i++)
for track_each(chart->tracks.background[i]) {
// No long notes in background tracks
event.pos = pos(note);
event.type = BM_NOTE;
event.track = -i;
event.value = note->value;
// Objects
for (int i = 0; i < 60; i++)
for track_each(chart->tracks.object[i]) {
if (note->value == -1) {
// Release of a long note
event.type = BM_NOTE_LONG;
event.value_a = pos(note) - event.pos;
// Add a pair of events to simplify time-range queries
event.pos = pos(note);
event.type = BM_NOTE_OFF;
} else {
event.pos = pos(note);
event.track = i + 10;
if (event.track >= 50) event.track -= 40;
event.value = note->value;
if (!note->hold) {
// Normal note
event.type = BM_NOTE;
// With a stable sorting algorithm only positions need to be compared
qsort(seq->events, seq->event_count,
sizeof(struct bm_event), event_pos_type_compare);
// Collect long notes
cap = 0;
for (int i = 0; i < seq->event_count; i++)
if (seq->events[i].type == BM_NOTE_LONG) {
add_event_arr(&seq->long_notes, &seq->events[i],
&seq->long_note_count, &cap);
#define sfree(_p) (((_p) != NULL) && (free(_p), (_p) = NULL))
void bm_close_chart(struct bm_chart *chart)
for (int i = 0; i < BM_INDEX_MAX; i++) sfree(chart->tables.wav[i]);
for (int i = 0; i < BM_INDEX_MAX; i++) sfree(chart->tables.bmp[i]);
for (int i = 0; i < chart->tracks.background_count; i++)
for (int i = 0; i < 60; i++)
void bm_close_seq(struct bm_seq *seq)
Copyright (c) 2019 Ayu
bmflat is licensed under Mulan PSL v2.
You can use this software according to the terms and conditions of the Mulan PSL v2.
You may obtain a copy of Mulan PSL v2 at:
See the Mulan PSL v2 for more details.
#ifndef _BMFLAT_H_
#define _BMFLAT_H_
#ifdef __cplusplus
extern "C" {
struct bm_metadata {
int player_num;
char *genre;
char *title;
char *artist;
char *subartist;
float init_tempo;
int play_level;
int judge_rank;
int gauge_total;
int difficulty;
char *stage_file;
char *banner;
char *back_bmp;
struct bm_tables {
#define BM_INDEX_MAX 1296
char *wav[BM_INDEX_MAX];
char *bmp[BM_INDEX_MAX];
float tempo[BM_INDEX_MAX];
short stop[BM_INDEX_MAX];
struct bm_note {
float beat; // In fractions of the bar
short bar:15;
short hold:1;
short value;
struct bm_track {
int note_count, note_cap;
struct bm_note *notes;
struct bm_tracks {
#define BM_BARS_COUNT 1000
unsigned char time_sig[BM_BARS_COUNT];
#define BM_BGM_TRACKS 64
int background_count;
struct bm_track background[BM_BGM_TRACKS];
struct bm_track object[60];
struct bm_track tempo;
struct bm_track bga_base;
struct bm_track bga_layer;
struct bm_track bga_poor;
struct bm_track ex_tempo;
struct bm_track stop;
struct bm_chart {
struct bm_metadata meta;
struct bm_tables tables;
struct bm_tracks tracks;
enum bm_event_type {
BM_BARLINE = 0, // value = index, value_a = time signature
BM_TEMPO_CHANGE, // value_f = BPM
BM_BGA_BASE_CHANGE, // value = index
BM_BGA_LAYER_CHANGE, // value = index
BM_BGA_POOR_CHANGE, // value = index
BM_STOP, // value = duration
BM_NOTE, // value = index
BM_NOTE_LONG, // value = index, value_a = duration
BM_NOTE_OFF, // value = index, value_a = duration
struct bm_event {
int pos; // beat * 48 + fraction in 48ths of a beat (192ths of a whole note)
enum bm_event_type type:8;
signed char track; // non-positive for backgrounds; 11 - 59 for objects
union {
struct {
short value;
short value_a;
float value_f;
struct bm_seq {
int event_count;
struct bm_event *events;
int long_note_count;
struct bm_event *long_notes;
#define BM_MSG_LEN 128
struct bm_log {
int line;
char message[BM_MSG_LEN];
extern struct bm_log *bm_logs;
int bm_load(struct bm_chart *chart, const char *source);
void bm_to_seq(struct bm_chart *chart, struct bm_seq *seq);
void bm_close_chart(struct bm_chart *chart);
void bm_close_seq(struct bm_seq *seq);
#ifdef __cplusplus
Copyright (c) 2019 Ayu
bmflat is licensed under Mulan PSL v2.
You can use this software according to the terms and conditions of the Mulan PSL v2.
You may obtain a copy of Mulan PSL v2 at:
See the Mulan PSL v2 for more details.
This file has been truncated, but you can view the full file.
Audio playback and capture library. Choice of public domain or MIT-0. See license statements at the end of this file.
miniaudio - v0.11.17 - 2023-05-27
David Reid -
1. Introduction
miniaudio is a single file library for audio playback and capture. To use it, do the following in
one .c file:
#include "miniaudio.h"
You can do `#include "miniaudio.h"` in other parts of the program just like any other header.
miniaudio includes both low level and high level APIs. The low level API is good for those who want
to do all of their mixing themselves and only require a light weight interface to the underlying
audio device. The high level API is good for those who have complex mixing and effect requirements.
In miniaudio, objects are transparent structures. Unlike many other libraries, there are no handles
to opaque objects which means you need to allocate memory for objects yourself. In the examples
presented in this documentation you will often see objects declared on the stack. You need to be
careful when translating these examples to your own code so that you don't accidentally declare
your objects on the stack and then cause them to become invalid once the function returns. In
addition, you must ensure the memory address of your objects remain the same throughout their
lifetime. You therefore cannot be making copies of your objects.
A config/init pattern is used throughout the entire library. The idea is that you set up a config
object and pass that into the initialization routine. The advantage to this system is that the
config object can be initialized with logical defaults and new properties added to it without
breaking the API. The config object can be allocated on the stack and does not need to be
maintained after initialization of the corresponding object.
1.1. Low Level API
The low level API gives you access to the raw audio data of an audio device. It supports playback,
capture, full-duplex and loopback (WASAPI only). You can enumerate over devices to determine which
physical device(s) you want to connect to.
The low level API uses the concept of a "device" as the abstraction for physical devices. The idea
is that you choose a physical device to emit or capture audio from, and then move data to/from the
device when miniaudio tells you to. Data is delivered to and from devices asynchronously via a
callback which you specify when initializing the device.
When initializing the device you first need to configure it. The device configuration allows you to
specify things like the format of the data delivered via the callback, the size of the internal
buffer and the ID of the device you want to emit or capture audio from.
Once you have the device configuration set up you can initialize the device. When initializing a
device you need to allocate memory for the device object beforehand. This gives the application
complete control over how the memory is allocated. In the example below we initialize a playback
device on the stack, but you could allocate it on the heap if that suits your situation better.
void data_callback(ma_device* pDevice, void* pOutput, const void* pInput, ma_uint32 frameCount)
// In playback mode copy data to pOutput. In capture mode read data from pInput. In full-duplex mode, both
// pOutput and pInput will be valid and you can move data from pInput into pOutput. Never process more than
// frameCount frames.
int main()
ma_device_config config = ma_device_config_init(ma_device_type_playback);
config.playback.format = ma_format_f32; // Set to ma_format_unknown to use the device's native format.
config.playback.channels = 2; // Set to 0 to use the device's native channel count.
config.sampleRate = 48000; // Set to 0 to use the device's native sample rate.
config.dataCallback = data_callback; // This function will be called when miniaudio needs more data.
config.pUserData = pMyCustomData; // Can be accessed from the device object (device.pUserData).
ma_device device;
if (ma_device_init(NULL, &config, &device) != MA_SUCCESS) {
return -1; // Failed to initialize the device.
ma_device_start(&device); // The device is sleeping by default so you'll need to start it manually.
// Do something here. Probably your program's main loop.
ma_device_uninit(&device); // This will stop the device so no need to do that manually.
return 0;
In the example above, `data_callback()` is where audio data is written and read from the device.
The idea is in playback mode you cause sound to be emitted from the speakers by writing audio data
to the output buffer (`pOutput` in the example). In capture mode you read data from the input
buffer (`pInput`) to extract sound captured by the microphone. The `frameCount` parameter tells you
how many frames can be written to the output buffer and read from the input buffer. A "frame" is
one sample for each channel. For example, in a stereo stream (2 channels), one frame is 2
samples: one for the left, one for the right. The channel count is defined by the device config.
The size in bytes of an individual sample is defined by the sample format which is also specified
in the device config. Multi-channel audio data is always interleaved, which means the samples for
each frame are stored next to each other in memory. For example, in a stereo stream the first pair
of samples will be the left and right samples for the first frame, the second pair of samples will
be the left and right samples for the second frame, etc.
The configuration of the device is defined by the `ma_device_config` structure. The config object
is always initialized with `ma_device_config_init()`. It's important to always initialize the
config with this function as it initializes it with logical defaults and ensures your program
doesn't break when new members are added to the `ma_device_config` structure. The example above
uses a fairly simple and standard device configuration. The call to `ma_device_config_init()` takes
a single parameter, which is whether or not the device is a playback, capture, duplex or loopback
device (loopback devices are not supported on all backends). The `config.playback.format` member
sets the sample format which can be one of the following (all formats are native-endian):
| Symbol | Description | Range |
| ma_format_f32 | 32-bit floating point | [-1, 1] |
| ma_format_s16 | 16-bit signed integer | [-32768, 32767] |
| ma_format_s24 | 24-bit signed integer (tightly packed) | [-8388608, 8388607] |
| ma_format_s32 | 32-bit signed integer | [-2147483648, 2147483647] |
| ma_format_u8 | 8-bit unsigned integer | [0, 255] |
The `config.playback.channels` member sets the number of channels to use with the device. The
channel count cannot exceed MA_MAX_CHANNELS. The `config.sampleRate` member sets the sample rate
(which must be the same for both playback and capture in full-duplex configurations). This is
usually set to 44100 or 48000, but can be set to anything. It's recommended to keep this between
8000 and 384000, however.
Note that leaving the format, channel count and/or sample rate at their default values will result
in the internal device's native configuration being used which is useful if you want to avoid the
overhead of miniaudio's automatic data conversion.
In addition to the sample format, channel count and sample rate, the data callback and user data
pointer are also set via the config. The user data pointer is not passed into the callback as a
parameter, but is instead set to the `pUserData` member of `ma_device` which you can access
directly since all miniaudio structures are transparent.
Initializing the device is done with `ma_device_init()`. This will return a result code telling you
what went wrong, if anything. On success it will return `MA_SUCCESS`. After initialization is
complete the device will be in a stopped state. To start it, use `ma_device_start()`.
Uninitializing the device will stop it, which is what the example above does, but you can also stop
the device with `ma_device_stop()`. To resume the device simply call `ma_device_start()` again.
Note that it's important to never stop or start the device from inside the callback. This will
result in a deadlock. Instead you set a variable or signal an event indicating that the device
needs to stop and handle it in a different thread. The following APIs must never be called inside
the callback:
You must never try uninitializing and reinitializing a device inside the callback. You must also
never try to stop and start it from inside the callback. There are a few other things you shouldn't
do in the callback depending on your requirements, however this isn't so much a thread-safety
thing, but rather a real-time processing thing which is beyond the scope of this introduction.
The example above demonstrates the initialization of a playback device, but it works exactly the
same for capture. All you need to do is change the device type from `ma_device_type_playback` to
`ma_device_type_capture` when setting up the config, like so:
ma_device_config config = ma_device_config_init(ma_device_type_capture);
config.capture.format = MY_FORMAT;
config.capture.channels = MY_CHANNEL_COUNT;
In the data callback you just read from the input buffer (`pInput` in the example above) and leave
the output buffer alone (it will be set to NULL when the device type is set to
These are the available device types and how you should handle the buffers in the callback:
| Device Type | Callback Behavior |
| ma_device_type_playback | Write to output buffer, leave input buffer untouched. |
| ma_device_type_capture | Read from input buffer, leave output buffer untouched. |
| ma_device_type_duplex | Read from input buffer, write to output buffer. |
| ma_device_type_loopback | Read from input buffer, leave output buffer untouched. |
You will notice in the example above that the sample format and channel count is specified
separately for playback and capture. This is to support different data formats between the playback
and capture devices in a full-duplex system. An example may be that you want to capture audio data
as a monaural stream (one channel), but output sound to a stereo speaker system. Note that if you
use different formats between playback and capture in a full-duplex configuration you will need to
convert the data yourself. There are functions available to help you do this which will be
explained later.
The example above did not specify a physical device to connect to which means it will use the
operating system's default device. If you have multiple physical devices connected and you want to
use a specific one you will need to specify the device ID in the configuration, like so:
config.playback.pDeviceID = pMyPlaybackDeviceID; // Only if requesting a playback or duplex device.
config.capture.pDeviceID = pMyCaptureDeviceID; // Only if requesting a capture, duplex or loopback device.
To retrieve the device ID you will need to perform device enumeration, however this requires the
use of a new concept called the "context". Conceptually speaking the context sits above the device.
There is one context to many devices. The purpose of the context is to represent the backend at a
more global level and to perform operations outside the scope of an individual device. Mainly it is
used for performing run-time linking against backend libraries, initializing backends and
enumerating devices. The example below shows how to enumerate devices.
ma_context context;
if (ma_context_init(NULL, 0, NULL, &context) != MA_SUCCESS) {
// Error.
ma_device_info* pPlaybackInfos;
ma_uint32 playbackCount;
ma_device_info* pCaptureInfos;
ma_uint32 captureCount;
if (ma_context_get_devices(&context, &pPlaybackInfos, &playbackCount, &pCaptureInfos, &captureCount) != MA_SUCCESS) {
// Error.
// Loop over each device info and do something with it. Here we just print the name with their index. You may want
// to give the user the opportunity to choose which device they'd prefer.
for (ma_uint32 iDevice = 0; iDevice < playbackCount; iDevice += 1) {
printf("%d - %s\n", iDevice, pPlaybackInfos[iDevice].name);
ma_device_config config = ma_device_config_init(ma_device_type_playback);
config.playback.pDeviceID = &pPlaybackInfos[chosenPlaybackDeviceIndex].id;
config.playback.format = MY_FORMAT;
config.playback.channels = MY_CHANNEL_COUNT;
config.sampleRate = MY_SAMPLE_RATE;
config.dataCallback = data_callback;
config.pUserData = pMyCustomData;
ma_device device;
if (ma_device_init(&context, &config, &device) != MA_SUCCESS) {
// Error
The first thing we do in this example is initialize a `ma_context` object with `ma_context_init()`.
The first parameter is a pointer to a list of `ma_backend` values which are used to override the
default backend priorities. When this is NULL, as in this example, miniaudio's default priorities
are used. The second parameter is the number of backends listed in the array pointed to by the
first parameter. The third parameter is a pointer to a `ma_context_config` object which can be
NULL, in which case defaults are used. The context configuration is used for setting the logging
callback, custom memory allocation callbacks, user-defined data and some backend-specific
Once the context has been initialized you can enumerate devices. In the example above we use the
simpler `ma_context_get_devices()`, however you can also use a callback for handling devices by
using `ma_context_enumerate_devices()`. When using `ma_context_get_devices()` you provide a pointer
to a pointer that will, upon output, be set to a pointer to a buffer containing a list of
`ma_device_info` structures. You also provide a pointer to an unsigned integer that will receive
the number of items in the returned buffer. Do not free the returned buffers as their memory is
managed internally by miniaudio.
The `ma_device_info` structure contains an `id` member which is the ID you pass to the device
config. It also contains the name of the device which is useful for presenting a list of devices
to the user via the UI.
When creating your own context you will want to pass it to `ma_device_init()` when initializing the
device. Passing in NULL, like we do in the first example, will result in miniaudio creating the
context for you, which you don't want to do since you've already created a context. Note that
internally the context is only tracked by it's pointer which means you must not change the location
of the `ma_context` object. If this is an issue, consider using `malloc()` to allocate memory for
the context.
1.2. High Level API
The high level API consists of three main parts:
* Resource management for loading and streaming sounds.
* A node graph for advanced mixing and effect processing.
* A high level "engine" that wraps around the resource manager and node graph.
The resource manager (`ma_resource_manager`) is used for loading sounds. It supports loading sounds
fully into memory and also streaming. It will also deal with reference counting for you which
avoids the same sound being loaded multiple times.
The node graph is used for mixing and effect processing. The idea is that you connect a number of
nodes into the graph by connecting each node's outputs to another node's inputs. Each node can
implement it's own effect. By chaining nodes together, advanced mixing and effect processing can
be achieved.
The engine encapsulates both the resource manager and the node graph to create a simple, easy to
use high level API. The resource manager and node graph APIs are covered in more later sections of
this manual.
The code below shows how you can initialize an engine using it's default configuration.
ma_result result;
ma_engine engine;
result = ma_engine_init(NULL, &engine);
if (result != MA_SUCCESS) {
return result; // Failed to initialize the engine.
This creates an engine instance which will initialize a device internally which you can access with
`ma_engine_get_device()`. It will also initialize a resource manager for you which can be accessed
with `ma_engine_get_resource_manager()`. The engine itself is a node graph (`ma_node_graph`) which
means you can pass a pointer to the engine object into any of the `ma_node_graph` APIs (with a
cast). Alternatively, you can use `ma_engine_get_node_graph()` instead of a cast.
Note that all objects in miniaudio, including the `ma_engine` object in the example above, are
transparent structures. There are no handles to opaque structures in miniaudio which means you need
to be mindful of how you declare them. In the example above we are declaring it on the stack, but
this will result in the struct being invalidated once the function encapsulating it returns. If
allocating the engine on the heap is more appropriate, you can easily do so with a standard call
to `malloc()` or whatever heap allocation routine you like:
ma_engine* pEngine = malloc(sizeof(*pEngine));
The `ma_engine` API uses the same config/init pattern used all throughout miniaudio. To configure
an engine, you can fill out a `ma_engine_config` object and pass it into the first parameter of
ma_result result;
ma_engine engine;
ma_engine_config engineConfig;
engineConfig = ma_engine_config_init();
engineConfig.pResourceManager = &myCustomResourceManager; // <-- Initialized as some earlier stage.
result = ma_engine_init(&engineConfig, &engine);
if (result != MA_SUCCESS) {
return result;
This creates an engine instance using a custom config. In this particular example it's showing how
you can specify a custom resource manager rather than having the engine initialize one internally.
This is particularly useful if you want to have multiple engine's share the same resource manager.
The engine must be uninitialized with `ma_engine_uninit()` when it's no longer needed.
By default the engine will be started, but nothing will be playing because no sounds have been
initialized. The easiest but least flexible way of playing a sound is like so:
ma_engine_play_sound(&engine, "my_sound.wav", NULL);
This plays what miniaudio calls an "inline" sound. It plays the sound once, and then puts the
internal sound up for recycling. The last parameter is used to specify which sound group the sound
should be associated with which will be explained later. This particular way of playing a sound is
simple, but lacks flexibility and features. A more flexible way of playing a sound is to first
initialize a sound:
ma_result result;
ma_sound sound;
result = ma_sound_init_from_file(&engine, "my_sound.wav", 0, NULL, NULL, &sound);
if (result != MA_SUCCESS) {
return result;
This returns a `ma_sound` object which represents a single instance of the specified sound file. If
you want to play the same file multiple times simultaneously, you need to create one sound for each
Sounds should be uninitialized with `ma_sound_uninit()`.
Sounds are not started by default. Start a sound with `ma_sound_start()` and stop it with
`ma_sound_stop()`. When a sound is stopped, it is not rewound to the start. Use
`ma_sound_seek_to_pcm_frame(&sound, 0)` to seek back to the start of a sound. By default, starting
and stopping sounds happens immediately, but sometimes it might be convenient to schedule the sound
the be started and/or stopped at a specific time. This can be done with the following functions:
The start/stop time needs to be specified based on the absolute timer which is controlled by the
engine. The current global time time in PCM frames can be retrieved with
`ma_engine_get_time_in_pcm_frames()`. The engine's global time can be changed with
`ma_engine_set_time_in_pcm_frames()` for synchronization purposes if required. Note that scheduling
a start time still requires an explicit call to `ma_sound_start()` before anything will play:
ma_sound_set_start_time_in_pcm_frames(&sound, ma_engine_get_time_in_pcm_frames(&engine) + (ma_engine_get_sample_rate(&engine) * 2);
The third parameter of `ma_sound_init_from_file()` is a set of flags that control how the sound be
loaded and a few options on which features should be enabled for that sound. By default, the sound
is synchronously loaded fully into memory straight from the file system without any kind of
decoding. If you want to decode the sound before storing it in memory, you need to specify the
`MA_SOUND_FLAG_DECODE` flag. This is useful if you want to incur the cost of decoding at an earlier
stage, such as a loading stage. Without this option, decoding will happen dynamically at mixing
time which might be too expensive on the audio thread.
If you want to load the sound asynchronously, you can specify the `MA_SOUND_FLAG_ASYNC` flag. This
will result in `ma_sound_init_from_file()` returning quickly, but the sound will not start playing
until the sound has had some audio decoded.
The fourth parameter is a pointer to sound group. A sound group is used as a mechanism to organise
sounds into groups which have their own effect processing and volume control. An example is a game
which might have separate groups for sfx, voice and music. Each of these groups have their own
independent volume control. Use `ma_sound_group_init()` or `ma_sound_group_init_ex()` to initialize
a sound group.
Sounds and sound groups are nodes in the engine's node graph and can be plugged into any `ma_node`
API. This makes it possible to connect sounds and sound groups to effect nodes to produce complex
effect chains.
A sound can have it's volume changed with `ma_sound_set_volume()`. If you prefer decibel volume
control you can use `ma_volume_db_to_linear()` to convert from decibel representation to linear.
Panning and pitching is supported with `ma_sound_set_pan()` and `ma_sound_set_pitch()`. If you know
a sound will never have it's pitch changed with `ma_sound_set_pitch()` or via the doppler effect,
you can specify the `MA_SOUND_FLAG_NO_PITCH` flag when initializing the sound for an optimization.
By default, sounds and sound groups have spatialization enabled. If you don't ever want to
spatialize your sounds, initialize the sound with the `MA_SOUND_FLAG_NO_SPATIALIZATION` flag. The
spatialization model is fairly simple and is roughly on feature parity with OpenAL. HRTF and
environmental occlusion are not currently supported, but planned for the future. The supported
features include:
* Sound and listener positioning and orientation with cones
* Attenuation models: none, inverse, linear and exponential
* Doppler effect
Sounds can be faded in and out with `ma_sound_set_fade_in_pcm_frames()`.
To check if a sound is currently playing, you can use `ma_sound_is_playing()`. To check if a sound
is at the end, use `ma_sound_at_end()`. Looping of a sound can be controlled with
`ma_sound_set_looping()`. Use `ma_sound_is_looping()` to check whether or not the sound is looping.
2. Building
miniaudio should work cleanly out of the box without the need to download or install any
dependencies. See below for platform-specific details.
Note that GCC and Clang require `-msse2`, `-mavx2`, etc. for SIMD optimizations.
If you get errors about undefined references to `__sync_val_compare_and_swap_8`, `__atomic_load_8`,
etc. you need to link with `-latomic`.
2.1. Windows
The Windows build should compile cleanly on all popular compilers without the need to configure any
include paths nor link to any libraries.
The UWP build may require linking to mmdevapi.lib if you get errors about an unresolved external
symbol for `ActivateAudioInterfaceAsync()`.
2.2. macOS and iOS
The macOS build should compile cleanly without the need to download any dependencies nor link to
any libraries or frameworks. The iOS build needs to be compiled as Objective-C and will need to
link the relevant frameworks but should compile cleanly out of the box with Xcode. Compiling
through the command line requires linking to `-lpthread` and `-lm`.
Due to the way miniaudio links to frameworks at runtime, your application may not pass Apple's
notarization process. To fix this there are two options. The first is to use the
`MA_NO_RUNTIME_LINKING` option, like so:
#ifdef __APPLE__
#include "miniaudio.h"
This will require linking with `-framework CoreFoundation -framework CoreAudio -framework AudioToolbox`.
If you get errors about AudioToolbox, try with `-framework AudioUnit` instead. You may get this when
using older versions of iOS. Alternatively, if you would rather keep using runtime linking you can
add the following to your entitlements.xcent file:
See this discussion for more info:
2.3. Linux
The Linux build only requires linking to `-ldl`, `-lpthread` and `-lm`. You do not need any
development packages. You may need to link with `-latomic` if you're compiling for 32-bit ARM.
2.4. BSD
The BSD build only requires linking to `-lpthread` and `-lm`. NetBSD uses audio(4), OpenBSD uses
sndio and FreeBSD uses OSS. You may need to link with `-latomic` if you're compiling for 32-bit
2.5. Android
AAudio is the highest priority backend on Android. This should work out of the box without needing
any kind of compiler configuration. Support for AAudio starts with Android 8 which means older
versions will fall back to OpenSL|ES which requires API level 16+.
There have been reports that the OpenSL|ES backend fails to initialize on some Android based
devices due to `dlopen()` failing to open "". If this happens on your platform
you'll need to disable run-time linking with `MA_NO_RUNTIME_LINKING` and link with -lOpenSLES.
2.6. Emscripten
The Emscripten build emits Web Audio JavaScript directly and should compile cleanly out of the box.
You cannot use `-std=c*` compiler flags, nor `-ansi`.
2.7. Build Options
`#define` these options before including miniaudio.h.
| Option | Description |
| MA_NO_WASAPI | Disables the WASAPI backend. |
| MA_NO_DSOUND | Disables the DirectSound backend. |
| MA_NO_WINMM | Disables the WinMM backend. |
| MA_NO_ALSA | Disables the ALSA backend. |
| MA_NO_PULSEAUDIO | Disables the PulseAudio backend. |
| MA_NO_JACK | Disables the JACK backend. |
| MA_NO_COREAUDIO | Disables the Core Audio backend. |
| MA_NO_SNDIO | Disables the sndio backend. |
| MA_NO_AUDIO4 | Disables the audio(4) backend. |
| MA_NO_OSS | Disables the OSS backend. |
| MA_NO_AAUDIO | Disables the AAudio backend. |
| MA_NO_OPENSL | Disables the OpenSL|ES backend. |
| MA_NO_WEBAUDIO | Disables the Web Audio backend. |
| MA_NO_NULL | Disables the null backend. |
| MA_ENABLE_ONLY_SPECIFIC_BACKENDS | Disables all backends by default and requires `MA_ENABLE_*` to |
| | enable specific backends. |
| | enable the WASAPI backend. |
| | enable the DirectSound backend. |
| | enable the WinMM backend. |
| | enable the ALSA backend. |
| | enable the PulseAudio backend. |
| | enable the JACK backend. |
| | enable the Core Audio backend. |
| | enable the sndio backend. |
| | enable the audio(4) backend. |
| MA_ENABLE_OSS | Used in conjunction with MA_ENABLE_ONLY_SPECIFIC_BACKENDS to |
| | enable the OSS backend. |
| | enable the AAudio backend. |
| | enable the OpenSL|ES backend. |
| | enable the Web Audio backend. |
| | enable the null backend. |
| MA_NO_DECODING | Disables decoding APIs. |
| MA_NO_ENCODING | Disables encoding APIs. |
| MA_NO_WAV | Disables the built-in WAV decoder and encoder. |
| MA_NO_FLAC | Disables the built-in FLAC decoder. |
| MA_NO_MP3 | Disables the built-in MP3 decoder. |
| MA_NO_DEVICE_IO | Disables playback and recording. This will disable `ma_context` |
| | and `ma_device` APIs. This is useful if you only want to use |
| | miniaudio's data conversion and/or decoding APIs. |
| MA_NO_RESOURCE_MANAGER | Disables the resource manager. When using the engine this will |
| | also disable the following functions: |
| | |
| | ``` |
| | ma_sound_init_from_file() |
| | ma_sound_init_from_file_w() |
| | ma_sound_init_copy() |
| | ma_engine_play_sound_ex() |
| | ma_engine_play_sound() |
| | ``` |
| | |
| | The only way to initialize a `ma_sound` object is to initialize it |
| | from a data source. |
| MA_NO_NODE_GRAPH | Disables the node graph API. This will also disable the engine API |
| | because it depends on the node graph. |
| MA_NO_ENGINE | Disables the engine API. |
| MA_NO_THREADING | Disables the `ma_thread`, `ma_mutex`, `ma_semaphore` and |
| | `ma_event` APIs. This option is useful if you only need to use |
| | miniaudio for data conversion, decoding and/or encoding. Some |
| | families of APIs require threading which means the following |
| | options must also be set: |
| | |
| | ``` |
| | ``` |
| MA_NO_GENERATION | Disables generation APIs such a `ma_waveform` and `ma_noise`. |
| MA_NO_SSE2 | Disables SSE2 optimizations. |
| MA_NO_AVX2 | Disables AVX2 optimizations. |
| MA_NO_NEON | Disables NEON optimizations. |
| MA_NO_RUNTIME_LINKING | Disables runtime linking. This is useful for passing Apple's |
| | notarization process. When enabling this, you may need to avoid |
| | using `-std=c89` or `-std=c99` on Linux builds or else you may end |
| | up with compilation errors due to conflicts with `timespec` and |
| | `timeval` data types. |
| | |
| | You may need to enable this if your target platform does not allow |
| | runtime linking via `dlopen()`. |
| MA_DEBUG_OUTPUT | Enable `printf()` output of debug logs (`MA_LOG_LEVEL_DEBUG`). |
| MA_COINIT_VALUE | Windows only. The value to pass to internal calls to |
| | `CoInitializeEx()`. Defaults to `COINIT_MULTITHREADED`. |
| MA_API | Controls how public APIs should be decorated. Default is `extern`. |
3. Definitions
This section defines common terms used throughout miniaudio. Unfortunately there is often ambiguity
in the use of terms throughout the audio space, so this section is intended to clarify how miniaudio
uses each term.
3.1. Sample
A sample is a single unit of audio data. If the sample format is f32, then one sample is one 32-bit
floating point number.
3.2. Frame / PCM Frame
A frame is a group of samples equal to the number of channels. For a stereo stream a frame is 2
samples, a mono frame is 1 sample, a 5.1 surround sound frame is 6 samples, etc. The terms "frame"
and "PCM frame" are the same thing in miniaudio. Note that this is different to a compressed frame.
If ever miniaudio needs to refer to a compressed frame, such as a FLAC frame, it will always
clarify what it's referring to with something like "FLAC frame".
3.3. Channel
A stream of monaural audio that is emitted from an individual speaker in a speaker system, or
received from an individual microphone in a microphone system. A stereo stream has two channels (a
left channel, and a right channel), a 5.1 surround sound system has 6 channels, etc. Some audio
systems refer to a channel as a complex audio stream that's mixed with other channels to produce
the final mix - this is completely different to miniaudio's use of the term "channel" and should
not be confused.
3.4. Sample Rate
The sample rate in miniaudio is always expressed in Hz, such as 44100, 48000, etc. It's the number
of PCM frames that are processed per second.
3.5. Formats
Throughout miniaudio you will see references to different sample formats:
| Symbol | Description | Range |
| ma_format_f32 | 32-bit floating point | [-1, 1] |
| ma_format_s16 | 16-bit signed integer | [-32768, 32767] |
| ma_format_s24 | 24-bit signed integer (tightly packed) | [-8388608, 8388607] |
| ma_format_s32 | 32-bit signed integer | [-2147483648, 2147483647] |
| ma_format_u8 | 8-bit unsigned integer | [0, 255] |
All formats are native-endian.
4. Data Sources
The data source abstraction in miniaudio is used for retrieving audio data from some source. A few
examples include `ma_decoder`, `ma_noise` and `ma_waveform`. You will need to be familiar with data
sources in order to make sense of some of the higher level concepts in miniaudio.
The `ma_data_source` API is a generic interface for reading from a data source. Any object that
implements the data source interface can be plugged into any `ma_data_source` function.
To read data from a data source:
ma_result result;
ma_uint64 framesRead;
result = ma_data_source_read_pcm_frames(pDataSource, pFramesOut, frameCount, &framesRead);
if (result != MA_SUCCESS) {
return result; // Failed to read data from the data source.
If you don't need the number of frames that were successfully read you can pass in `NULL` to the
`pFramesRead` parameter. If this returns a value less than the number of frames requested it means
the end of the file has been reached. `MA_AT_END` will be returned only when the number of frames
read is 0.
When calling any data source function, with the exception of `ma_data_source_init()` and
`ma_data_source_uninit()`, you can pass in any object that implements a data source. For example,
you could plug in a decoder like so:
ma_result result;
ma_uint64 framesRead;
ma_decoder decoder; // <-- This would be initialized with `ma_decoder_init_*()`.
result = ma_data_source_read_pcm_frames(&decoder, pFramesOut, frameCount, &framesRead);
if (result != MA_SUCCESS) {
return result; // Failed to read data from the decoder.
If you want to seek forward you can pass in `NULL` to the `pFramesOut` parameter. Alternatively you
can use `ma_data_source_seek_pcm_frames()`.
To seek to a specific PCM frame:
result = ma_data_source_seek_to_pcm_frame(pDataSource, frameIndex);
if (result != MA_SUCCESS) {
return result; // Failed to seek to PCM frame.
You can retrieve the total length of a data source in PCM frames, but note that some data sources
may not have the notion of a length, such as noise and waveforms, and others may just not have a
way of determining the length such as some decoders. To retrieve the length:
ma_uint64 length;
result = ma_data_source_get_length_in_pcm_frames(pDataSource, &length);
if (result != MA_SUCCESS) {
return result; // Failed to retrieve the length.
Care should be taken when retrieving the length of a data source where the underlying decoder is
pulling data from a data stream with an undefined length, such as internet radio or some kind of
broadcast. If you do this, `ma_data_source_get_length_in_pcm_frames()` may never return.
The current position of the cursor in PCM frames can also be retrieved:
ma_uint64 cursor;
result = ma_data_source_get_cursor_in_pcm_frames(pDataSource, &cursor);
if (result != MA_SUCCESS) {
return result; // Failed to retrieve the cursor.
You will often need to know the data format that will be returned after reading. This can be
retrieved like so:
ma_format format;
ma_uint32 channels;
ma_uint32 sampleRate;
ma_channel channelMap[MA_MAX_CHANNELS];
result = ma_data_source_get_data_format(pDataSource, &format, &channels, &sampleRate, channelMap, MA_MAX_CHANNELS);
if (result != MA_SUCCESS) {
return result; // Failed to retrieve data format.
If you do not need a specific data format property, just pass in NULL to the respective parameter.
There may be cases where you want to implement something like a sound bank where you only want to
read data within a certain range of the underlying data. To do this you can use a range:
result = ma_data_source_set_range_in_pcm_frames(pDataSource, rangeBegInFrames, rangeEndInFrames);
if (result != MA_SUCCESS) {
return result; // Failed to set the range.
This is useful if you have a sound bank where many sounds are stored in the same file and you want
the data source to only play one of those sub-sounds. Note that once the range is set, everything
that takes a position, such as cursors and loop points, should always be relatvie to the start of
the range. When the range is set, any previously defined loop point will be reset.
Custom loop points can also be used with data sources. By default, data sources will loop after
they reach the end of the data source, but if you need to loop at a specific location, you can do
the following:
result = ma_data_set_loop_point_in_pcm_frames(pDataSource, loopBegInFrames, loopEndInFrames);
if (result != MA_SUCCESS) {
return result; // Failed to set the loop point.
The loop point is relative to the current range.
It's sometimes useful to chain data sources together so that a seamless transition can be achieved.
To do this, you can use chaining:
ma_decoder decoder1;
ma_decoder decoder2;
// ... initialize decoders with ma_decoder_init_*() ...
result = ma_data_source_set_next(&decoder1, &decoder2);
if (result != MA_SUCCESS) {
return result; // Failed to set the next data source.
result = ma_data_source_read_pcm_frames(&decoder1, pFramesOut, frameCount, pFramesRead);
if (result != MA_SUCCESS) {
return result; // Failed to read from the decoder.
In the example above we're using decoders. When reading from a chain, you always want to read from
the top level data source in the chain. In the example above, `decoder1` is the top level data
source in the chain. When `decoder1` reaches the end, `decoder2` will start seamlessly without any
Note that when looping is enabled, only the current data source will be looped. You can loop the
entire chain by linking in a loop like so:
ma_data_source_set_next(&decoder1, &decoder2); // decoder1 -> decoder2
ma_data_source_set_next(&decoder2, &decoder1); // decoder2 -> decoder1 (loop back to the start).
Note that setting up chaining is not thread safe, so care needs to be taken if you're dynamically
changing links while the audio thread is in the middle of reading.
Do not use `ma_decoder_seek_to_pcm_frame()` as a means to reuse a data source to play multiple
instances of the same sound simultaneously. This can be extremely inefficient depending on the type
of data source and can result in glitching due to subtle changes to the state of internal filters.
Instead, initialize multiple data sources for each instance.
4.1. Custom Data Sources
You can implement a custom data source by implementing the functions in `ma_data_source_vtable`.
Your custom object must have `ma_data_source_base` as it's first member:
struct my_data_source
ma_data_source_base base;
In your initialization routine, you need to call `ma_data_source_init()` in order to set up the
base object (`ma_data_source_base`):
static ma_result my_data_source_read(ma_data_source* pDataSource, void* pFramesOut, ma_uint64 frameCount, ma_uint64* pFramesRead)
// Read data here. Output in the same format returned by my_data_source_get_data_format().
static ma_result my_data_source_seek(ma_data_source* pDataSource, ma_uint64 frameIndex)
// Seek to a specific PCM frame here. Return MA_NOT_IMPLEMENTED if seeking is not supported.
static ma_result my_data_source_get_data_format(ma_data_source* pDataSource, ma_format* pFormat, ma_uint32* pChannels, ma_uint32* pSampleRate, ma_channel* pChannelMap, size_t channelMapCap)
// Return the format of the data here.
static ma_result my_data_source_get_cursor(ma_data_source* pDataSource, ma_uint64* pCursor)
// Retrieve the current position of the cursor here. Return MA_NOT_IMPLEMENTED and set *pCursor to 0 if there is no notion of a cursor.
static ma_result my_data_source_get_length(ma_data_source* pDataSource, ma_uint64* pLength)
// Retrieve the length in PCM frames here. Return MA_NOT_IMPLEMENTED and set *pLength to 0 if there is no notion of a length or if the length is unknown.
static ma_data_source_vtable g_my_data_source_vtable =
ma_result my_data_source_init(my_data_source* pMyDataSource)
ma_result result;
ma_data_source_config baseConfig;
baseConfig = ma_data_source_config_init();
baseConfig.vtable = &g_my_data_source_vtable;
result = ma_data_source_init(&baseConfig, &pMyDataSource->base);
if (result != MA_SUCCESS) {
return result;
// ... do the initialization of your custom data source here ...
return MA_SUCCESS;
void my_data_source_uninit(my_data_source* pMyDataSource)
// ... do the uninitialization of your custom data source here ...
// You must uninitialize the base data source.
Note that `ma_data_source_init()` and `ma_data_source_uninit()` are never called directly outside
of the custom data source. It's up to the custom data source itself to call these within their own
init/uninit functions.
5. Engine
The `ma_engine` API is a high level API for managing and mixing sounds and effect processing. The
`ma_engine` object encapsulates a resource manager and a node graph, both of which will be
explained in more detail later.
Sounds are called `ma_sound` and are created from an engine. Sounds can be associated with a mixing
group called `ma_sound_group` which are also created from the engine. Both `ma_sound` and
`ma_sound_group` objects are nodes within the engine's node graph.
When the engine is initialized, it will normally create a device internally. If you would rather
manage the device yourself, you can do so and just pass a pointer to it via the engine config when
you initialize the engine. You can also just use the engine without a device, which again can be
configured via the engine config.
The most basic way to initialize the engine is with a default config, like so:
ma_result result;
ma_engine engine;
result = ma_engine_init(NULL, &engine);
if (result != MA_SUCCESS) {
return result; // Failed to initialize the engine.
This will result in the engine initializing a playback device using the operating system's default
device. This will be sufficient for many use cases, but if you need more flexibility you'll want to
configure the engine with an engine config:
ma_result result;
ma_engine engine;
ma_engine_config engineConfig;
engineConfig = ma_engine_config_init();
engineConfig.pDevice = &myDevice;
result = ma_engine_init(&engineConfig, &engine);
if (result != MA_SUCCESS) {
return result; // Failed to initialize the engine.
In the example above we're passing in a pre-initialized device. Since the caller is the one in
control of the device's data callback, it's their responsibility to manually call
`ma_engine_read_pcm_frames()` from inside their data callback:
void playback_data_callback(ma_device* pDevice, void* pOutput, const void* pInput, ma_uint32 frameCount)
ma_engine_read_pcm_frames(&g_Engine, pOutput, frameCount, NULL);
You can also use the engine independent of a device entirely:
ma_result result;
ma_engine engine;
ma_engine_config engineConfig;
engineConfig = ma_engine_config_init();
engineConfig.noDevice = MA_TRUE;
engineConfig.channels = 2; // Must be set when not using a device.
engineConfig.sampleRate = 48000; // Must be set when not using a device.
result = ma_engine_init(&engineConfig, &engine);
if (result != MA_SUCCESS) {
return result; // Failed to initialize the engine.
Note that when you're not using a device, you must set the channel count and sample rate in the
config or else miniaudio won't know what to use (miniaudio will use the device to determine this
normally). When not using a device, you need to use `ma_engine_read_pcm_frames()` to process audio
data from the engine. This kind of setup is useful if you want to do something like offline
processing or want to use a different audio system for playback such as SDL.
When a sound is loaded it goes through a resource manager. By default the engine will initialize a
resource manager internally, but you can also specify a pre-initialized resource manager:
ma_result result;
ma_engine engine1;
ma_engine engine2;
ma_engine_config engineConfig;
engineConfig = ma_engine_config_init();
engineConfig.pResourceManager = &myResourceManager;
ma_engine_init(&engineConfig, &engine1);
ma_engine_init(&engineConfig, &engine2);
In this example we are initializing two engines, both of which are sharing the same resource
manager. This is especially useful for saving memory when loading the same file across multiple
engines. If you were not to use a shared resource manager, each engine instance would use their own
which would result in any sounds that are used between both engine's being loaded twice. By using
a shared resource manager, it would only be loaded once. Using multiple engine's is useful when you
need to output to multiple playback devices, such as in a local multiplayer game where each player
is using their own set of headphones.
By default an engine will be in a started state. To make it so the engine is not automatically
started you can configure it as such:
engineConfig.noAutoStart = MA_TRUE;
// The engine will need to be started manually.
// Later on the engine can be stopped with ma_engine_stop().
The concept of starting or stopping an engine is only relevant when using the engine with a
device. Attempting to start or stop an engine that is not associated with a device will result in
The master volume of the engine can be controlled with `ma_engine_set_volume()` which takes a
linear scale, with 0 resulting in silence and anything above 1 resulting in amplification. If you
prefer decibel based volume control, use `ma_volume_db_to_linear()` to convert from dB to linear.
When a sound is spatialized, it is done so relative to a listener. An engine can be configured to
have multiple listeners which can be configured via the config:
engineConfig.listenerCount = 2;
The maximum number of listeners is restricted to `MA_ENGINE_MAX_LISTENERS`. By default, when a
sound is spatialized, it will be done so relative to the closest listener. You can also pin a sound
to a specific listener which will be explained later. Listener's have a position, direction, cone,
and velocity (for doppler effect). A listener is referenced by an index, the meaning of which is up
to the caller (the index is 0 based and cannot go beyond the listener count, minus 1). The
position, direction and velocity are all specified in absolute terms:
ma_engine_listener_set_position(&engine, listenerIndex, worldPosX, worldPosY, worldPosZ);
The direction of the listener represents it's forward vector. The listener's up vector can also be
specified and defaults to +1 on the Y axis.
ma_engine_listener_set_direction(&engine, listenerIndex, forwardX, forwardY, forwardZ);
ma_engine_listener_set_world_up(&engine, listenerIndex, 0, 1, 0);
The engine supports directional attenuation. The listener can have a cone the controls how sound is
attenuated based on the listener's direction. When a sound is between the inner and outer cones, it
will be attenuated between 1 and the cone's outer gain:
ma_engine_listener_set_cone(&engine, listenerIndex, innerAngleInRadians, outerAngleInRadians, outerGain);
When a sound is inside the inner code, no directional attenuation is applied. When the sound is
outside of the outer cone, the attenuation will be set to `outerGain` in the example above. When
the sound is in between the inner and outer cones, the attenuation will be interpolated between 1
and the outer gain.
The engine's coordinate system follows the OpenGL coordinate system where positive X points right,
positive Y points up and negative Z points forward.
The simplest and least flexible way to play a sound is like so:
ma_engine_play_sound(&engine, "my_sound.wav", pGroup);
This is a "fire and forget" style of function. The engine will manage the `ma_sound` object
internally. When the sound finishes playing, it'll be put up for recycling. For more flexibility
you'll want to initialize a sound object:
ma_sound sound;
result = ma_sound_init_from_file(&engine, "my_sound.wav", flags, pGroup, NULL, &sound);
if (result != MA_SUCCESS) {
return result; // Failed to load sound.
Sounds need to be uninitialized with `ma_sound_uninit()`.
The example above loads a sound from a file. If the resource manager has been disabled you will not
be able to use this function and instead you'll need to initialize a sound directly from a data
ma_sound sound;
result = ma_sound_init_from_data_source(&engine, &dataSource, flags, pGroup, &sound);
if (result != MA_SUCCESS) {
return result;
Each `ma_sound` object represents a single instance of the sound. If you want to play the same
sound multiple times at the same time, you need to initialize a separate `ma_sound` object.
For the most flexibility when initializing sounds, use `ma_sound_init_ex()`. This uses miniaudio's
standard config/init pattern:
ma_sound sound;
ma_sound_config soundConfig;
soundConfig = ma_sound_config_init();
soundConfig.pFilePath = NULL; // Set this to load from a file path.
soundConfig.pDataSource = NULL; // Set this to initialize from an existing data source.
soundConfig.pInitialAttachment = &someNodeInTheNodeGraph;
soundConfig.initialAttachmentInputBusIndex = 0;
soundConfig.channelsIn = 1;
soundConfig.channelsOut = 0; // Set to 0 to use the engine's native channel count.
result = ma_sound_init_ex(&soundConfig, &sound);
if (result != MA_SUCCESS) {
return result;
In the example above, the sound is being initialized without a file nor a data source. This is
valid, in which case the sound acts as a node in the middle of the node graph. This means you can
connect other sounds to this sound and allow it to act like a sound group. Indeed, this is exactly
what a `ma_sound_group` is.
When loading a sound, you specify a set of flags that control how the sound is loaded and what
features are enabled for that sound. When no flags are set, the sound will be fully loaded into
memory in exactly the same format as how it's stored on the file system. The resource manager will
allocate a block of memory and then load the file directly into it. When reading audio data, it
will be decoded dynamically on the fly. In order to save processing time on the audio thread, it
might be beneficial to pre-decode the sound. You can do this with the `MA_SOUND_FLAG_DECODE` flag:
ma_sound_init_from_file(&engine, "my_sound.wav", MA_SOUND_FLAG_DECODE, pGroup, NULL, &sound);
By default, sounds will be loaded synchronously, meaning `ma_sound_init_*()` will not return until
the sound has been fully loaded. If this is prohibitive you can instead load sounds asynchronously
by specifying the `MA_SOUND_FLAG_ASYNC` flag:
ma_sound_init_from_file(&engine, "my_sound.wav", MA_SOUND_FLAG_DECODE | MA_SOUND_FLAG_ASYNC, pGroup, NULL, &sound);
This will result in `ma_sound_init_*()` returning quickly, but the sound won't yet have been fully
loaded. When you start the sound, it won't output anything until some sound is available. The sound
will start outputting audio before the sound has been fully decoded when the `MA_SOUND_FLAG_DECODE`
is specified.
If you need to wait for an asynchronously loaded sound to be fully loaded, you can use a fence. A
fence in miniaudio is a simple synchronization mechanism which simply blocks until it's internal
counter hit's zero. You can specify a fence like so:
ma_result result;
ma_fence fence;
ma_sound sounds[4];
result = ma_fence_init(&fence);
if (result != MA_SUCCESS) {
return result;
// Load some sounds asynchronously.
for (int iSound = 0; iSound < 4; iSound += 1) {
ma_sound_init_from_file(&engine, mySoundFilesPaths[iSound], MA_SOUND_FLAG_DECODE | MA_SOUND_FLAG_ASYNC, pGroup, &fence, &sounds[iSound]);
// ... do some other stuff here in the mean time ...
// Wait for all sounds to finish loading.
If loading the entire sound into memory is prohibitive, you can also configure the engine to stream
the audio data:
ma_sound_init_from_file(&engine, "my_sound.wav", MA_SOUND_FLAG_STREAM, pGroup, NULL, &sound);
When streaming sounds, 2 seconds worth of audio data is stored in memory. Although it should work
fine, it's inefficient to use streaming for short sounds. Streaming is useful for things like music
tracks in games.
When loading a sound from a file path, the engine will reference count the file to prevent it from
being loaded if it's already in memory. When you uninitialize a sound, the reference count will be
decremented, and if it hits zero, the sound will be unloaded from memory. This reference counting
system is not used for streams. The engine will use a 64-bit hash of the file name when comparing
file paths which means there's a small chance you might encounter a name collision. If this is an
issue, you'll need to use a different name for one of the colliding file paths, or just not load
from files and instead load from a data source.
You can use `ma_sound_init_copy()` to initialize a copy of another sound. Note, however, that this
only works for sounds that were initialized with `ma_sound_init_from_file()` and without the
When you initialize a sound, if you specify a sound group the sound will be attached to that group
automatically. If you set it to NULL, it will be automatically attached to the engine's endpoint.
If you would instead rather leave the sound unattached by default, you can can specify the
`MA_SOUND_FLAG_NO_DEFAULT_ATTACHMENT` flag. This is useful if you want to set up a complex node
Sounds are not started by default. To start a sound, use `ma_sound_start()`. Stop a sound with
Sounds can have their volume controlled with `ma_sound_set_volume()` in the same way as the
engine's master volume.
Sounds support stereo panning and pitching. Set the pan with `ma_sound_set_pan()`. Setting the pan
to 0 will result in an unpanned sound. Setting it to -1 will shift everything to the left, whereas
+1 will shift it to the right. The pitch can be controlled with `ma_sound_set_pitch()`. A larger
value will result in a higher pitch. The pitch must be greater than 0.
The engine supports 3D spatialization of sounds. By default sounds will have spatialization
enabled, but if a sound does not need to be spatialized it's best to disable it. There are two ways
to disable spatialization of a sound:
// Disable spatialization at initialization time via a flag:
ma_sound_init_from_file(&engine, "my_sound.wav", MA_SOUND_FLAG_NO_SPATIALIZATION, NULL, NULL, &sound);
// Dynamically disable or enable spatialization post-initialization:
ma_sound_set_spatialization_enabled(&sound, isSpatializationEnabled);
By default sounds will be spatialized based on the closest listener. If a sound should always be
spatialized relative to a specific listener it can be pinned to one:
ma_sound_set_pinned_listener_index(&sound, listenerIndex);
Like listeners, sounds have a position. By default, the position of a sound is in absolute space,
but it can be changed to be relative to a listener:
ma_sound_set_positioning(&sound, ma_positioning_relative);
Note that relative positioning of a sound only makes sense if there is either only one listener, or
the sound is pinned to a specific listener. To set the position of a sound:
ma_sound_set_position(&sound, posX, posY, posZ);
The direction works the same way as a listener and represents the sound's forward direction:
ma_sound_set_direction(&sound, forwardX, forwardY, forwardZ);
Sound's also have a cone for controlling directional attenuation. This works exactly the same as
ma_sound_set_cone(&sound, innerAngleInRadians, outerAngleInRadians, outerGain);
The velocity of a sound is used for doppler effect and can be set as such:
ma_sound_set_velocity(&sound, velocityX, velocityY, velocityZ);
The engine supports different attenuation models which can be configured on a per-sound basis. By
default the attenuation model is set to `ma_attenuation_model_inverse` which is the equivalent to
OpenAL's `AL_INVERSE_DISTANCE_CLAMPED`. Configure the attenuation model like so:
ma_sound_set_attenuation_model(&sound, ma_attenuation_model_inverse);
The supported attenuation models include the following:
| ma_attenuation_model_none | No distance attenuation. |
| ma_attenuation_model_inverse | Equivalent to `AL_INVERSE_DISTANCE_CLAMPED`. |
| ma_attenuation_model_linear | Linear attenuation. |
| ma_attenuation_model_exponential | Exponential attenuation. |
To control how quickly a sound rolls off as it moves away from the listener, you need to configure
the rolloff:
ma_sound_set_rolloff(&sound, rolloff);
You can control the minimum and maximum gain to apply from spatialization:
ma_sound_set_min_gain(&sound, minGain);
ma_sound_set_max_gain(&sound, maxGain);
Likewise, in the calculation of attenuation, you can control the minimum and maximum distances for
the attenuation calculation. This is useful if you want to ensure sounds don't drop below a certain
volume after the listener moves further away and to have sounds play a maximum volume when the
listener is within a certain distance:
ma_sound_set_min_distance(&sound, minDistance);
ma_sound_set_max_distance(&sound, maxDistance);
The engine's spatialization system supports doppler effect. The doppler factor can be configure on
a per-sound basis like so:
ma_sound_set_doppler_factor(&sound, dopplerFactor);
You can fade sounds in and out with `ma_sound_set_fade_in_pcm_frames()` and
`ma_sound_set_fade_in_milliseconds()`. Set the volume to -1 to use the current volume as the
starting volume:
// Fade in over 1 second.
ma_sound_set_fade_in_milliseconds(&sound, 0, 1, 1000);
// ... sometime later ...
// Fade out over 1 second, starting from the current volume.
ma_sound_set_fade_in_milliseconds(&sound, -1, 0, 1000);
By default sounds will start immediately, but sometimes for timing and synchronization purposes it
can be useful to schedule a sound to start or stop:
// Start the sound in 1 second from now.
ma_sound_set_start_time_in_pcm_frames(&sound, ma_engine_get_time_in_pcm_frames(&engine) + (ma_engine_get_sample_rate(&engine) * 1));
// Stop the sound in 2 seconds from now.
ma_sound_set_stop_time_in_pcm_frames(&sound, ma_engine_get_time_in_pcm_frames(&engine) + (ma_engine_get_sample_rate(&engine) * 2));
Note that scheduling a start time still requires an explicit call to `ma_sound_start()` before
anything will play.
The time is specified in global time which is controlled by the engine. You can get the engine's
current time with `ma_engine_get_time_in_pcm_frames()`. The engine's global time is incremented
automatically as audio data is read, but it can be reset with `ma_engine_set_time_in_pcm_frames()`
in case it needs to be resynchronized for some reason.
To determine whether or not a sound is currently playing, use `ma_sound_is_playing()`. This will
take the scheduled start and stop times into account.
Whether or not a sound should loop can be controlled with `ma_sound_set_looping()`. Sounds will not
be looping by default. Use `ma_sound_is_looping()` to determine whether or not a sound is looping.
Use `ma_sound_at_end()` to determine whether or not a sound is currently at the end. For a looping
sound this should never return true. Alternatively, you can configure a callback that will be fired
when the sound reaches the end. Note that the callback is fired from the audio thread which means
you cannot be uninitializing sound from the callback. To set the callback you can use
`ma_sound_set_end_callback()`. Alternatively, if you're using `ma_sound_init_ex()`, you can pass it
into the config like so:
soundConfig.endCallback = my_end_callback;
soundConfig.pEndCallbackUserData = pMyEndCallbackUserData;
The end callback is declared like so:
void my_end_callback(void* pUserData, ma_sound* pSound)
Internally a sound wraps around a data source. Some APIs exist to control the underlying data
source, mainly for convenience:
ma_sound_seek_to_pcm_frame(&sound, frameIndex);
ma_sound_get_data_format(&sound, &format, &channels, &sampleRate, pChannelMap, channelMapCapacity);
ma_sound_get_cursor_in_pcm_frames(&sound, &cursor);
ma_sound_get_length_in_pcm_frames(&sound, &length);
Sound groups have the same API as sounds, only they are called `ma_sound_group`, and since they do
not have any notion of a data source, anything relating to a data source is unavailable.
Internally, sound data is loaded via the `ma_decoder` API which means by default it only supports
file formats that have built-in support in miniaudio. You can extend this to support any kind of
file format through the use of custom decoders. To do this you'll need to use a self-managed
resource manager and configure it appropriately. See the "Resource Management" section below for
details on how to set this up.
6. Resource Management
Many programs will want to manage sound resources for things such as reference counting and
streaming. This is supported by miniaudio via the `ma_resource_manager` API.
The resource manager is mainly responsible for the following:
* Loading of sound files into memory with reference counting.
* Streaming of sound data.
When loading a sound file, the resource manager will give you back a `ma_data_source` compatible
object called `ma_resource_manager_data_source`. This object can be passed into any
`ma_data_source` API which is how you can read and seek audio data. When loading a sound file, you
specify whether or not you want the sound to be fully loaded into memory (and optionally
pre-decoded) or streamed. When loading into memory, you can also specify whether or not you want
the data to be loaded asynchronously.
The example below is how you can initialize a resource manager using it's default configuration:
ma_resource_manager_config config;
ma_resource_manager resourceManager;
config = ma_resource_manager_config_init();
result = ma_resource_manager_init(&config, &resourceManager);
if (result != MA_SUCCESS) {
printf("Failed to initialize the resource manager.");
return -1;
You can configure the format, channels and sample rate of the decoded audio data. By default it
will use the file's native data format, but you can configure it to use a consistent format. This
is useful for offloading the cost of data conversion to load time rather than dynamically
converting at mixing time. To do this, you configure the decoded format, channels and sample rate
like the code below:
config = ma_resource_manager_config_init();
config.decodedFormat = device.playback.format;
config.decodedChannels = device.playback.channels;
config.decodedSampleRate = device.sampleRate;
In the code above, the resource manager will be configured so that any decoded audio data will be
pre-converted at load time to the device's native data format. If instead you used defaults and
the data format of the file did not match the device's data format, you would need to convert the
data at mixing time which may be prohibitive in high-performance and large scale scenarios like
Internally the resource manager uses the `ma_decoder` API to load sounds. This means by default it
only supports decoders that are built into miniaudio. It's possible to support additional encoding
formats through the use of custom decoders. To do so, pass in your `ma_decoding_backend_vtable`
vtables into the resource manager config:
ma_decoding_backend_vtable* pCustomBackendVTables[] =
resourceManagerConfig.ppCustomDecodingBackendVTables = pCustomBackendVTables;
resourceManagerConfig.customDecodingBackendCount = sizeof(pCustomBackendVTables) / sizeof(pCustomBackendVTables[0]);
resourceManagerConfig.pCustomDecodingBackendUserData = NULL;
This system can allow you to support any kind of file format. See the "Decoding" section for
details on how to implement custom decoders. The miniaudio repository includes examples for Opus
via libopus and libopusfile and Vorbis via libvorbis and libvorbisfile.
Asynchronicity is achieved via a job system. When an operation needs to be performed, such as the
decoding of a page, a job will be posted to a queue which will then be processed by a job thread.
By default there will be only one job thread running, but this can be configured, like so:
config = ma_resource_manager_config_init();
config.jobThreadCount = MY_JOB_THREAD_COUNT;
By default job threads are managed internally by the resource manager, however you can also self
manage your job threads if, for example, you want to integrate the job processing into your
existing job infrastructure, or if you simply don't like the way the resource manager does it. To
do this, just set the job thread count to 0 and process jobs manually. To process jobs, you first
need to retrieve a job using `ma_resource_manager_next_job()` and then process it using
config = ma_resource_manager_config_init();
config.jobThreadCount = 0; // Don't manage any job threads internally.
config.flags = MA_RESOURCE_MANAGER_FLAG_NON_BLOCKING; // Optional. Makes `ma_resource_manager_next_job()` non-blocking.
// ... Initialize your custom job threads ...
void my_custom_job_thread(...)
for (;;) {
ma_job job;
ma_result result = ma_resource_manager_next_job(pMyResourceManager, &job);
if (result != MA_SUCCESS) {
if (result == MA_NO_DATA_AVAILABLE) {
// No jobs are available. Keep going. Will only get this if the resource manager was initialized
} else if (result == MA_CANCELLED) {
// MA_JOB_TYPE_QUIT was posted. Exit.
} else {
// Some other error occurred.
In the example above, the `MA_JOB_TYPE_QUIT` event is the used as the termination
indicator, but you can use whatever you would like to terminate the thread. The call to
`ma_resource_manager_next_job()` is blocking by default, but can be configured to be non-blocking
by initializing the resource manager with the `MA_RESOURCE_MANAGER_FLAG_NON_BLOCKING` configuration
flag. Note that the `MA_JOB_TYPE_QUIT` will never be removed from the job queue. This
is to give every thread the opportunity to catch the event and terminate naturally.
When loading a file, it's sometimes convenient to be able to customize how files are opened and
read instead of using standard `fopen()`, `fclose()`, etc. which is what miniaudio will use by
default. This can be done by setting `pVFS` member of the resource manager's config:
// Initialize your custom VFS object. See documentation for VFS for information on how to do this.
my_custom_vfs vfs = my_custom_vfs_init();
config = ma_resource_manager_config_init();
config.pVFS = &vfs;
This is particularly useful in programs like games where you want to read straight from an archive
rather than the normal file system. If you do not specify a custom VFS, the resource manager will
use the operating system's normal file operations.
To load a sound file and create a data source, call `ma_resource_manager_data_source_init()`. When
loading a sound you need to specify the file path and options for how the sounds should be loaded.
By default a sound will be loaded synchronously. The returned data source is owned by the caller
which means the caller is responsible for the allocation and freeing of the data source. Below is
an example for initializing a data source:
ma_resource_manager_data_source dataSource;
ma_result result = ma_resource_manager_data_source_init(pResourceManager, pFilePath, flags, &dataSource);
if (result != MA_SUCCESS) {
// Error.
// ...
// A ma_resource_manager_data_source object is compatible with the `ma_data_source` API. To read data, just call
// the `ma_data_source_read_pcm_frames()` like you would with any normal data source.
result = ma_data_source_read_pcm_frames(&dataSource, pDecodedData, frameCount, &framesRead);
if (result != MA_SUCCESS) {
// Failed to read PCM frames.
// ...
ma_resource_manager_data_source_uninit(pResourceManager, &dataSource);
The `flags` parameter specifies how you want to perform loading of the sound file. It can be a
combination of the following flags:
When no flags are specified (set to 0), the sound will be fully loaded into memory, but not
decoded, meaning the raw file data will be stored in memory, and then dynamically decoded when
`ma_data_source_read_pcm_frames()` is called. To instead decode the audio data before storing it in
memory, use the `MA_RESOURCE_MANAGER_DATA_SOURCE_FLAG_DECODE` flag. By default, the sound file will
be loaded synchronously, meaning `ma_resource_manager_data_source_init()` will only return after
the entire file has been loaded. This is good for simplicity, but can be prohibitively slow. You
can instead load the sound asynchronously using the `MA_RESOURCE_MANAGER_DATA_SOURCE_ASYNC` flag.
This will result in `ma_resource_manager_data_source_init()` returning quickly, but no data will be
returned by `ma_data_source_read_pcm_frames()` until some data is available. When no data is
available because the asynchronous decoding hasn't caught up, `MA_BUSY` will be returned by
For large sounds, it's often prohibitive to store the entire file in memory. To mitigate this, you
can instead stream audio data which you can do by specifying the
`MA_RESOURCE_MANAGER_DATA_SOURCE_FLAG_STREAM` flag. When streaming, data will be decoded in 1
second pages. When a new page needs to be decoded, a job will be posted to the job queue and then
subsequently processed in a job thread.
For in-memory sounds, reference counting is used to ensure the data is loaded only once. This means
multiple calls to `ma_resource_manager_data_source_init()` with the same file path will result in
the file data only being loaded once. Each call to `ma_resource_manager_data_source_init()` must be
matched up with a call to `ma_resource_manager_data_source_uninit()`. Sometimes it can be useful
for a program to register self-managed raw audio data and associate it with a file path. Use the
`ma_resource_manager_register_*()` and `ma_resource_manager_unregister_*()` APIs to do this.
`ma_resource_manager_register_decoded_data()` is used to associate a pointer to raw, self-managed
decoded audio data in the specified data format with the specified name. Likewise,
`ma_resource_manager_register_encoded_data()` is used to associate a pointer to raw self-managed
encoded audio data (the raw file data) with the specified name. Note that these names need not be
actual file paths. When `ma_resource_manager_data_source_init()` is called (without the
`MA_RESOURCE_MANAGER_DATA_SOURCE_FLAG_STREAM` flag), the resource manager will look for these
explicitly registered data buffers and, if found, will use it as the backing data for the data
source. Note that the resource manager does *not* make a copy of this data so it is up to the
caller to ensure the pointer stays valid for it's lifetime. Use
`ma_resource_manager_unregister_data()` to unregister the self-managed data. You can also use
`ma_resource_manager_register_file()` and `ma_resource_manager_unregister_file()` to register and
unregister a file. It does not make sense to use the `MA_RESOURCE_MANAGER_DATA_SOURCE_FLAG_STREAM`
flag with a self-managed data pointer.
6.1. Asynchronous Loading and Synchronization
When loading asynchronously, it can be useful to poll whether or not loading has finished. Use
`ma_resource_manager_data_source_result()` to determine this. For in-memory sounds, this will
return `MA_SUCCESS` when the file has been *entirely* decoded. If the sound is still being decoded,
`MA_BUSY` will be returned. Otherwise, some other error code will be returned if the sound failed
to load. For streaming data sources, `MA_SUCCESS` will be returned when the first page has been
decoded and the sound is ready to be played. If the first page is still being decoded, `MA_BUSY`
will be returned. Otherwise, some other error code will be returned if the sound failed to load.
In addition to polling, you can also use a simple synchronization object called a "fence" to wait
for asynchronously loaded sounds to finish. This is called `ma_fence`. The advantage to using a
fence is that it can be used to wait for a group of sounds to finish loading rather than waiting
for sounds on an individual basis. There are two stages to loading a sound:
* Initialization of the internal decoder; and
* Completion of decoding of the file (the file is fully decoded)
You can specify separate fences for each of the different stages. Waiting for the initialization
of the internal decoder is important for when you need to know the sample format, channels and
sample rate of the file.
The example below shows how you could use a fence when loading a number of sounds:
// This fence will be released when all sounds are finished loading entirely.
ma_fence fence;
// This will be passed into the initialization routine for each sound.
ma_resource_manager_pipeline_notifications notifications = ma_resource_manager_pipeline_notifications_init();
notifications.done.pFence = &fence;
// Now load a bunch of sounds:
for (iSound = 0; iSound < soundCount; iSound += 1) {
ma_resource_manager_data_source_init(pResourceManager, pSoundFilePaths[iSound], flags, &notifications, &pSoundSources[iSound]);
// Wait for loading of sounds to finish.
In the example above we used a fence for waiting until the entire file has been fully decoded. If
you only need to wait for the initialization of the internal decoder to complete, you can use the
`init` member of the `ma_resource_manager_pipeline_notifications` object:
notifications.init.pFence = &fence;
If a fence is not appropriate for your situation, you can instead use a callback that is fired on
an individual sound basis. This is done in a very similar way to fences:
typedef struct
ma_async_notification_callbacks cb;
void* pMyData;
} my_notification;
void my_notification_callback(ma_async_notification* pNotification)
my_notification* pMyNotification = (my_notification*)pNotification;
// Do something in response to the sound finishing loading.
my_notification myCallback;
myCallback.cb.onSignal = my_notification_callback;
myCallback.pMyData = pMyData;
ma_resource_manager_pipeline_notifications notifications = ma_resource_manager_pipeline_notifications_init();
notifications.done.pNotification = &myCallback;
ma_resource_manager_data_source_init(pResourceManager, "my_sound.wav", flags, &notifications, &mySound);
In the example above we just extend the `ma_async_notification_callbacks` object and pass an
instantiation into the `ma_resource_manager_pipeline_notifications` in the same way as we did with
the fence, only we set `pNotification` instead of `pFence`. You can set both of these at the same
time and they should both work as expected. If using the `pNotification` system, you need to ensure
your `ma_async_notification_callbacks` object stays valid.
6.2. Resource Manager Implementation Details
Resources are managed in two main ways:
* By storing the entire sound inside an in-memory buffer (referred to as a data buffer)
* By streaming audio data on the fly (referred to as a data stream)
A resource managed data source (`ma_resource_manager_data_source`) encapsulates a data buffer or
data stream, depending on whether or not the data source was initialized with the
`MA_RESOURCE_MANAGER_DATA_SOURCE_FLAG_STREAM` flag. If so, it will make use of a
`ma_resource_manager_data_stream` object. Otherwise it will use a `ma_resource_manager_data_buffer`
object. Both of these objects are data sources which means they can be used with any
`ma_data_source_*()` API.
Another major feature of the resource manager is the ability to asynchronously decode audio files.
This relieves the audio thread of time-consuming decoding which can negatively affect scalability
due to the audio thread needing to complete it's work extremely quickly to avoid glitching.
Asynchronous decoding is achieved through a job system. There is a central multi-producer,
multi-consumer, fixed-capacity job queue. When some asynchronous work needs to be done, a job is
posted to the queue which is then read by a job thread. The number of job threads can be
configured for improved scalability, and job threads can all run in parallel without needing to
worry about the order of execution (how this is achieved is explained below).
When a sound is being loaded asynchronously, playback can begin before the sound has been fully
decoded. This enables the application to start playback of the sound quickly, while at the same
time allowing to resource manager to keep loading in the background. Since there may be less
threads than the number of sounds being loaded at a given time, a simple scheduling system is used
to keep decoding time balanced and fair. The resource manager solves this by splitting decoding
into chunks called pages. By default, each page is 1 second long. When a page has been decoded, a
new job will be posted to start decoding the next page. By dividing up decoding into pages, an
individual sound shouldn't ever delay every other sound from having their first page decoded. Of
course, when loading many sounds at the same time, there will always be an amount of time required
to process jobs in the queue so in heavy load situations there will still be some delay. To
determine if a data source is ready to have some frames read, use
`ma_resource_manager_data_source_get_available_frames()`. This will return the number of frames
available starting from the current position.
6.2.1. Job Queue
The resource manager uses a job queue which is multi-producer, multi-consumer, and fixed-capacity.
This job queue is not currently lock-free, and instead uses a spinlock to achieve thread-safety.
Only a fixed number of jobs can be allocated and inserted into the queue which is done through a
lock-free data structure for allocating an index into a fixed sized array, with reference counting
for mitigation of the ABA problem. The reference count is 32-bit.
For many types of jobs it's important that they execute in a specific order. In these cases, jobs
are executed serially. For the resource manager, serial execution of jobs is only required on a
per-object basis (per data buffer or per data stream). Each of these objects stores an execution
counter. When a job is posted it is associated with an execution counter. When the job is
processed, it checks if the execution counter of the job equals the execution counter of the
owning object and if so, processes the job. If the counters are not equal, the job will be posted
back onto the job queue for later processing. When the job finishes processing the execution order
of the main object is incremented. This system means the no matter how many job threads are
executing, decoding of an individual sound will always get processed serially. The advantage to
having multiple threads comes into play when loading multiple sounds at the same time.
The resource manager's job queue is not 100% lock-free and will use a spinlock to achieve
thread-safety for a very small section of code. This is only relevant when the resource manager
uses more than one job thread. If only using a single job thread, which is the default, the
lock should never actually wait in practice. The amount of time spent locking should be quite
short, but it's something to be aware of for those who have pedantic lock-free requirements and
need to use more than one job thread. There are plans to remove this lock in a future version.
In addition, posting a job will release a semaphore, which on Win32 is implemented with
`ReleaseSemaphore` and on POSIX platforms via a condition variable:
pSemaphore->value += 1;
Again, this is relevant for those with strict lock-free requirements in the audio thread. To avoid
this, you can use non-blocking mode (via the `MA_JOB_QUEUE_FLAG_NON_BLOCKING`
flag) and implement your own job processing routine (see the "Resource Manager" section above for
details on how to do this).
6.2.2. Data Buffers
When the `MA_RESOURCE_MANAGER_DATA_SOURCE_FLAG_STREAM` flag is excluded at initialization time, the
resource manager will try to load the data into an in-memory data buffer. Before doing so, however,
it will first check if the specified file is already loaded. If so, it will increment a reference
counter and just use the already loaded data. This saves both time and memory. When the data buffer
is uninitialized, the reference counter will be decremented. If the counter hits zero, the file
will be unloaded. This is a detail to keep in mind because it could result in excessive loading and
unloading of a sound. For example, the following sequence will result in a file be loaded twice,
once after the other:
ma_resource_manager_data_source_init(pResourceManager, "my_file", ..., &myDataBuffer0); // Refcount = 1. Initial load.
ma_resource_manager_data_source_uninit(pResourceManager, &myDataBuffer0); // Refcount = 0. Unloaded.
ma_resource_manager_data_source_init(pResourceManager, "my_file", ..., &myDataBuffer1); // Refcount = 1. Reloaded because previous uninit() unloaded it.
ma_resource_manager_data_source_uninit(pResourceManager, &myDataBuffer1); // Refcount = 0. Unloaded.
A binary search tree (BST) is used for storing data buffers as it has good balance between
efficiency and simplicity. The key of the BST is a 64-bit hash of the file path that was passed
into `ma_resource_manager_data_source_init()`. The advantage of using a hash is that it saves
memory over storing the entire path, has faster comparisons, and results in a mostly balanced BST
due to the random nature of the hash. The disadvantages are that file names are case-sensitive and
there's a small chance of name collisions. If case-sensitivity is an issue, you should normalize
your file names to upper- or lower-case before initializing your data sources. If name collisions
become an issue, you'll need to change the name of one of the colliding names or just not use the
resource manager.
When a sound file has not already been loaded and the `MA_RESOURCE_MANAGER_DATA_SOURCE_FLAG_ASYNC`
flag is excluded, the file will be decoded synchronously by the calling thread. There are two
options for controlling how the audio is stored in the data buffer - encoded or decoded. When the
`MA_RESOURCE_MANAGER_DATA_SOURCE_FLAG_DECODE` option is excluded, the raw file data will be stored
in memory. Otherwise the sound will be decoded before storing it in memory. Synchronous loading is
a very simple and standard process of simply adding an item to the BST, allocating a block of
memory and then decoding (if `MA_RESOURCE_MANAGER_DATA_SOURCE_FLAG_DECODE` is specified).
When the `MA_RESOURCE_MANAGER_DATA_SOURCE_FLAG_ASYNC` flag is specified, loading of the data buffer
is done asynchronously. In this case, a job is posted to the queue to start loading and then the
function immediately returns, setting an internal result code to `MA_BUSY`. This result code is
returned when the program calls `ma_resource_manager_data_source_result()`. When decoding has fully
completed `MA_SUCCESS` will be returned. This can be used to know if loading has fully completed.
When loading asynchronously, a single job is posted to the queue of the type
`MA_JOB_TYPE_RESOURCE_MANAGER_LOAD_DATA_BUFFER_NODE`. This involves making a copy of the file path and
associating it with job. When the job is processed by the job thread, it will first load the file
using the VFS associated with the resource manager. When using a custom VFS, it's important that it
be completely thread-safe because it will be used from one or more job threads at the same time.
Individual files should only ever be accessed by one thread at a time, however. After opening the
file via the VFS, the job will determine whether or not the file is being decoded. If not, it
simply allocates a block of memory and loads the raw file contents into it and returns. On the
other hand, when the file is being decoded, it will first allocate a decoder on the heap and
initialize it. Then it will check if the length of the file is known. If so it will allocate a
block of memory to store the decoded output and initialize it to silence. If the size is unknown,
it will allocate room for one page. After memory has been allocated, the first page will be
decoded. If the sound is shorter than a page, the result code will be set to `MA_SUCCESS` and the
completion event will be signalled and loading is now complete. If, however, there is more to
decode, a job with the code `MA_JOB_TYPE_RESOURCE_MANAGER_PAGE_DATA_BUFFER_NODE` is posted. This job
will decode the next page and perform the same process if it reaches the end. If there is more to
decode, the job will post another `MA_JOB_TYPE_RESOURCE_MANAGER_PAGE_DATA_BUFFER_NODE` job which will
keep on happening until the sound has been fully decoded. For sounds of an unknown length, each
page will be linked together as a linked list. Internally this is implemented via the
`ma_paged_audio_buffer` object.
6.2.3. Data Streams
Data streams only ever store two pages worth of data for each instance. They are most useful for
large sounds like music tracks in games that would consume too much memory if fully decoded in
memory. After every frame from a page has been read, a job will be posted to load the next page
which is done from the VFS.
For data streams, the `MA_RESOURCE_MANAGER_DATA_SOURCE_FLAG_ASYNC` flag will determine whether or
not initialization of the data source waits until the two pages have been decoded. When unset,
`ma_resource_manager_data_source_init()` will wait until the two pages have been loaded, otherwise
it will return immediately.
When frames are read from a data stream using `ma_resource_manager_data_source_read_pcm_frames()`,
`MA_BUSY` will be returned if there are no frames available. If there are some frames available,
but less than the number requested, `MA_SUCCESS` will be returned, but the actual number of frames
read will be less than the number requested. Due to the asynchronous nature of data streams,
seeking is also asynchronous. If the data stream is in the middle of a seek, `MA_BUSY` will be
returned when trying to read frames.
When `ma_resource_manager_data_source_read_pcm_frames()` results in a page getting fully consumed
a job is posted to load the next page. This will be posted from the same thread that called
Data streams are uninitialized by posting a job to the queue, but the function won't return until
that job has been processed. The reason for this is that the caller owns the data stream object and
therefore miniaudio needs to ensure everything completes before handing back control to the caller.
Also, if the data stream is uninitialized while pages are in the middle of decoding, they must
complete before destroying any underlying object and the job system handles this cleanly.
Note that when a new page needs to be loaded, a job will be posted to the resource manager's job
thread from the audio thread. You must keep in mind the details mentioned in the "Job Queue"
section above regarding locking when posting an event if you require a strictly lock-free audio
7. Node Graph
miniaudio's routing infrastructure follows a node graph paradigm. The idea is that you create a
node whose outputs are attached to inputs of another node, thereby creating a graph. There are
different types of nodes, with each node in the graph processing input data to produce output,
which is then fed through the chain. Each node in the graph can apply their own custom effects. At
the start of the graph will usually be one or more data source nodes which have no inputs and
instead pull their data from a data source. At the end of the graph is an endpoint which represents
the end of the chain and is where the final output is ultimately extracted from.
Each node has a number of input buses and a number of output buses. An output bus from a node is
attached to an input bus of another. Multiple nodes can connect their output buses to another
node's input bus, in which case their outputs will be mixed before processing by the node. Below is
a diagram that illustrates a hypothetical node graph setup:
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> Data flows left to right >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
+---------------+ +-----------------+
| Data Source 1 =----+ +----------+ +----= Low Pass Filter =----+
+---------------+ | | =----+ +-----------------+ | +----------+
+----= Splitter | +----= ENDPOINT |
+---------------+ | | =----+ +-----------------+ | +----------+
| Data Source 2 =----+ +----------+ +----= Echo / Delay =----+
+---------------+ +-----------------+
In the above graph, it starts with two data sources whose outputs are attached to the input of a
splitter node. It's at this point that the two data sources are mixed. After mixing, the splitter
performs it's processing routine and produces two outputs which is simply a duplication of the
input stream. One output is attached to a low pass filter, whereas the other output is attached to
a echo/delay. The outputs of the the low pass filter and the echo are attached to the endpoint, and
since they're both connected to the same input bus, they'll be mixed.
Each input bus must be configured to accept the same number of channels, but the number of channels
used by input buses can be different to the number of channels for output buses in which case
miniaudio will automatically convert the input data to the output channel count before processing.
The number of channels of an output bus of one node must match the channel count of the input bus
it's attached to. The channel counts cannot be changed after the node has been initialized. If you
attempt to attach an output bus to an input bus with a different channel count, attachment will
To use a node graph, you first need to initialize a `ma_node_graph` object. This is essentially a
container around the entire graph. The `ma_node_graph` object is required for some thread-safety
issues which will be explained later. A `ma_node_graph` object is initialized using miniaudio's
standard config/init system:
ma_node_graph_config nodeGraphConfig = ma_node_graph_config_init(myChannelCount);
result = ma_node_graph_init(&nodeGraphConfig, NULL, &nodeGraph); // Second parameter is a pointer to allocation callbacks.
if (result != MA_SUCCESS) {
// Failed to initialize node graph.
When you initialize the node graph, you're specifying the channel count of the endpoint. The
endpoint is a special node which has one input bus and one output bus, both of which have the
same channel count, which is specified in the config. Any nodes that connect directly to the
endpoint must be configured such that their output buses have the same channel count. When you read
audio data from the node graph, it'll have the channel count you specified in the config. To read
data from the graph:
ma_uint32 framesRead;
result = ma_node_graph_read_pcm_frames(&nodeGraph, pFramesOut, frameCount, &framesRead);
if (result != MA_SUCCESS) {
// Failed to read data from the node graph.
When you read audio data, miniaudio starts at the node graph's endpoint node which then pulls in
data from it's input attachments, which in turn recursively pull in data from their inputs, and so
on. At the start of the graph there will be some kind of data source node which will have zero
inputs and will instead read directly from a data source. The base nodes don't literally need to
read from a `ma_data_source` object, but they will always have some kind of underlying object that