Skip to content

Instantly share code, notes, and snippets.

@srikumarks
Last active January 27, 2024 05:41
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save srikumarks/6180450 to your computer and use it in GitHub Desktop.
Save srikumarks/6180450 to your computer and use it in GitHub Desktop.
A simple C program for testing the data throughput between a parent and a child process. The purpose is to estimate how many typical audio buffers of length 4096 float32 samples can be sent between two processes in duplex mode. Note: Code will need to be adapted for other OSes.
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
// Code from http://stackoverflow.com/questions/5167269/clock-gettime-alternative-in-mac-os-x
#include <mach/mach_time.h>
#define ORWL_NANO (+1.0E-9)
#define ORWL_GIGA UINT64_C(1000000000)
static double orwl_timebase = 0.0;
static uint64_t orwl_timestart = 0;
double orwl_gettime(void) {
// be more careful in a multithreaded environement
if (!orwl_timestart) {
mach_timebase_info_data_t tb = { 0 };
mach_timebase_info(&tb);
orwl_timebase = tb.numer;
orwl_timebase /= tb.denom;
orwl_timestart = mach_absolute_time();
}
return (mach_absolute_time() - orwl_timestart) * orwl_timebase * ORWL_NANO;
}
int main(int argc, const char *argv[]) {
if (argc < 5) {
printf(
"Usage: %s buffer_length number_of_iterations always_malloc fill_buffer\n"
"\n"
"This program sends a buffer_length sample (float32) audio buffer back and\n"
"forth with a child process and reports the time taken.\n"
"\n"
"When always_malloc == 0, the parent will allocate the buffer\n"
"only once for all iterations. When it is 1, it will allocate it\n"
"fresh for every iteration.\n"
"\n"
"When fill_buffer == 1, the buffer will be filled with a signal\n"
"every time it is sent to the child. If this is 0, it will not be\n"
"filled, which perhaps helps measure the raw I/O throughput.\n",
argv[0]
);
return 0;
}
unsigned int buffer_length = atoi(argv[1]);
unsigned int buffer_byte_length = buffer_length * sizeof(float);
int repeat_count = atoi(argv[2]);
int always_malloc = atoi(argv[3]);
int fill_buffer = atoi(argv[4]);
int parent_to_child[2], child_to_parent[2];
pipe(parent_to_child);
pipe(child_to_parent);
if (fork() != 0) {
// PARENT FORK.
unsigned char *buffer = NULL;
int i, r, nbytes;
close(parent_to_child[0]);
close(child_to_parent[1]);
printf("always_malloc = %s, fill_buffer = %s\n", always_malloc ? "yes" : "no", fill_buffer ? "yes" : "no");
double start = orwl_gettime();
// Send a buffer to child and wait for it to return.
for (r = 0; r < repeat_count; ++r) {
if (always_malloc || r == 0) {
buffer = (unsigned char*)malloc(buffer_byte_length);
}
if (fill_buffer) {
// Fill with a signal.
for (i = 0; i < buffer_length; ++i) {
((float*)buffer)[i] = (i * 1.0f / buffer_length) - 0.5f;
}
}
for (i = 0; i < buffer_byte_length;) {
nbytes = write(parent_to_child[1], buffer + i, buffer_byte_length - i);
i += nbytes;
}
//printf("parent: wrote\n");
for (i = 0; i < buffer_byte_length;) {
nbytes = read(child_to_parent[0], buffer + i, buffer_byte_length - i);
i += nbytes;
}
//printf("parent: read\n");
if (always_malloc || r + 1 == repeat_count) {
free(buffer);
}
}
double stop = orwl_gettime();
close(parent_to_child[1]);
printf("time taken for %d buffers of length %d float32 samples = %.1f ms\n", repeat_count, buffer_length, (stop - start) * 1000);
printf("time to send and receive one buffer = %.1f ns\n", (stop - start) * 1000000000.0f / repeat_count);
printf("Effective stream over sampling rate (relative to 48KHz) = %.1f\n", repeat_count * buffer_length / (48000 * (stop - start)));
printf("peak duplex data rate = %.0f MB/sec\n", repeat_count * 1.0 * buffer_byte_length / (1024 * 1024 * (stop - start)));
} else {
// CHILD FORK
unsigned char *buffer = (unsigned char*)malloc(buffer_byte_length);
int i, r, nbytes;
close(parent_to_child[1]);
close(child_to_parent[0]);
while (1) {
// Read the sent buffer.
for (i = 0; i < buffer_byte_length;) {
nbytes = read(parent_to_child[0], buffer + i, buffer_byte_length - i);
if (nbytes <= 0) {
return 0;
}
i += nbytes;
}
//printf("child: read\n");
// Send it back.
for (i = 0; i < buffer_byte_length;) {
nbytes = write(child_to_parent[1], buffer + i, buffer_byte_length - i);
if (nbytes <= 0) {
return 0;
}
i += nbytes;
}
//printf("child: wrote\n");
}
}
return 0;
}
@srikumarks
Copy link
Author

Some initial results -

Machine: 1.7GHz core i5, MacBook Air 4GB RAM (1333MHz DDR3), running MacOS 10.8.4
clang --version

Apple LLVM version 4.2 (clang-425.0.28) (based on LLVM 3.2svn)
Target: x86_64-apple-darwin12.4.0
Thread model: posix

Program compiled using "clang -O3 audio_data_duplex_throughput.c"

always_malloc = yes, fill_buffer = yes
time taken for 100000 buffers of length 128 float32 samples = 1039.4 ms
time to send and receive one buffer = 10394.1 ns
Effective stream over sampling rate (relative to 48KHz) = 256.6
peak duplex data rate = 47 MB/sec

always_malloc = yes, fill_buffer = yes
time taken for 100000 buffers of length 256 float32 samples = 1041.5 ms
time to send and receive one buffer = 10415.4 ns
Effective stream over sampling rate (relative to 48KHz) = 512.1
peak duplex data rate = 94 MB/sec

always_malloc = yes, fill_buffer = yes
time taken for 100000 buffers of length 512 float32 samples = 1202.8 ms
time to send and receive one buffer = 12027.9 ns
Effective stream over sampling rate (relative to 48KHz) = 886.8
peak duplex data rate = 162 MB/sec

always_malloc = yes, fill_buffer = yes
time taken for 100000 buffers of length 768 float32 samples = 1475.6 ms
time to send and receive one buffer = 14756.0 ns
Effective stream over sampling rate (relative to 48KHz) = 1084.3
peak duplex data rate = 199 MB/sec

always_malloc = yes, fill_buffer = yes
time taken for 100000 buffers of length 1024 float32 samples = 1534.6 ms
time to send and receive one buffer = 15346.0 ns
Effective stream over sampling rate (relative to 48KHz) = 1390.2
peak duplex data rate = 255 MB/sec

always_malloc = yes, fill_buffer = yes
time taken for 100000 buffers of length 2048 float32 samples = 2130.2 ms
time to send and receive one buffer = 21301.7 ns
Effective stream over sampling rate (relative to 48KHz) = 2003.0
peak duplex data rate = 367 MB/sec

always_malloc = yes, fill_buffer = yes
time taken for 100000 buffers of length 4096 float32 samples = 3636.6 ms
time to send and receive one buffer = 36366.2 ns
Effective stream over sampling rate (relative to 48KHz) = 2346.5
peak duplex data rate = 430 MB/sec

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment