Skip to content

Instantly share code, notes, and snippets.

@kuzux
Last active February 17, 2024 14:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kuzux/fa502dbcb1ed1d8a950d38c3aa13a2a0 to your computer and use it in GitHub Desktop.
Save kuzux/fa502dbcb1ed1d8a950d38c3aa13a2a0 to your computer and use it in GitHub Desktop.
Getting a bunch of properties about the video out of an mp4 file
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <errno.h>
#include <stdbool.h>
#include <arpa/inet.h>
#include <sys/stat.h>
#include <assert.h>
void print_usage(const char* prog)
{
fprintf(stderr, "Usage: %s FILENAME\n", prog);
exit(1);
}
void die(const char* msg)
{
perror(msg);
exit(1);
}
struct atom_header {
uint32_t size;
char type[4];
uint64_t ext_size;
};
// returns true if we should continue reading atoms
bool read_atom_header(int fd, struct atom_header* header)
{
read(fd, header, sizeof(struct atom_header) - 8);
header->size = ntohl(header->size);
if(header->size == 1) {
uint64_t ext_size;
read(fd, &ext_size, sizeof(uint64_t));
header->ext_size = ntohll(ext_size);
}
return (header->size != 0);
}
uint64_t atom_size(struct atom_header header)
{
if(header.size == 1) return header.ext_size;
return header.size;
}
int atom_header_size(struct atom_header header)
{
if(header.size == 1) return 16;
return 8;
}
struct tkhd_data {
uint8_t dont_care[76];
uint32_t width;
uint32_t height;
};
struct mdhd_data {
uint8_t dont_care[12];
uint32_t timescale; // units per second
uint32_t duration; // in units of timescale
uint32_t dont_care_end;
};
struct table_header {
uint32_t flags; // actually version + flags
uint32_t n_entries;
// folloewed by n entries
};
struct stsz_header {
uint32_t flags; // actually version + flags
uint32_t uniform_size; // 0 if not all frames have the same size
uint32_t n_entries;
// folloewed by n entries
};
struct stsd_entry {
uint32_t size;
char format[4];
uint8_t reserved[6];
uint16_t data_reference_index;
};
struct stsc_entry {
uint32_t first_chunk;
uint32_t samples_per_chunk;
uint32_t sample_description_id; // no need to convert to host
};
struct stts_entry {
uint32_t sample_count;
uint32_t sample_duration;
};
struct mp4_video_info {
int fd;
char format[4];
uint32_t timescale;
uint32_t duration;
uint32_t width;
uint32_t height;
uint32_t length_in_bytes;
size_t timing_table_length;
struct stts_entry* timing_table;
size_t chunk_offset_table_length;
uint32_t* chunk_offset_table;
size_t chunk_mapping_table_length;
struct stsc_entry* chunk_mapping_table;
size_t keyframes_table_length;
uint32_t* keyframes_table;
size_t frame_size_table_length;
uint32_t* frame_size_table;
};
struct parse_context {
int fd;
uint32_t curr_offset;
uint32_t max_offset; // max for the current atom being parsed
struct mp4_video_info* info;
};
struct parse_context empty_context(int fd, uint32_t file_length)
{
struct parse_context res;
res.fd = fd;
res.curr_offset = 0;
res.max_offset = file_length;
res.info = malloc(sizeof(struct mp4_video_info));
memset(res.info, 0, sizeof(struct mp4_video_info));
res.info->fd = fd;
return res;
}
struct parse_context child_context(struct parse_context ctx, uint32_t start_offset, uint32_t end_offset)
{
struct parse_context res = ctx;
res.curr_offset = start_offset;
res.max_offset = end_offset;
return res;
}
// chunk offsets table
void parse_stco(struct parse_context ctx)
{
struct table_header header;
read(ctx.fd, &header, sizeof(struct table_header));
header.n_entries = ntohl(header.n_entries);
printf(" stco table size %u\n", header.n_entries);
ctx.info->chunk_offset_table_length = header.n_entries;
size_t byte_count = header.n_entries * sizeof(uint32_t);
ctx.info->chunk_offset_table = malloc(byte_count);
read(ctx.fd, ctx.info->chunk_offset_table, byte_count);
for(size_t i=0; i<header.n_entries; i++)
ctx.info->chunk_offset_table[i] = ntohl(ctx.info->chunk_offset_table[i]);
}
// sample => chunk table
void parse_stsc(struct parse_context ctx)
{
struct table_header header;
read(ctx.fd, &header, sizeof(struct table_header));
header.n_entries = ntohl(header.n_entries);
printf(" stsc table size %u\n", header.n_entries);
ctx.info->chunk_mapping_table_length = header.n_entries;
size_t byte_count = header.n_entries * sizeof(struct stsc_entry);
ctx.info->chunk_mapping_table = malloc(byte_count);
read(ctx.fd, ctx.info->chunk_mapping_table, byte_count);
for(uint32_t i=0; i<header.n_entries; i++) {
ctx.info->chunk_mapping_table[i].first_chunk = ntohl(ctx.info->chunk_mapping_table[i].first_chunk);
ctx.info->chunk_mapping_table[i].samples_per_chunk = ntohl(ctx.info->chunk_mapping_table[i].samples_per_chunk);
}
}
// sample definition table
void parse_stsd(struct parse_context ctx)
{
struct table_header header;
read(ctx.fd, &header, sizeof(struct table_header));
header.n_entries = ntohl(header.n_entries);
printf(" stsd table size %u\n", header.n_entries);
assert(header.n_entries == 1);
struct stsd_entry entry;
read(ctx.fd, &entry, sizeof(struct stsd_entry));
entry.size = ntohl(entry.size);
printf(" stsd entry size: %u, format: %.4s\n", entry.size, entry.format);
memcpy(ctx.info->format, entry.format, 4);
}
// keyframes table
void parse_stss(struct parse_context ctx)
{
struct table_header header;
read(ctx.fd, &header, sizeof(struct table_header));
header.n_entries = ntohl(header.n_entries);
printf(" stss table (# of keyframes) size %u\n", header.n_entries);
size_t byte_count = header.n_entries * sizeof(uint32_t);
uint32_t* keyframes_table = malloc(byte_count);
read(ctx.fd, keyframes_table, byte_count);
for(size_t i=0; i<header.n_entries; i++) {
keyframes_table[i] = ntohl(keyframes_table[i]);
printf(" frame %u is a keyframe\n", keyframes_table[i]);
}
ctx.info->keyframes_table_length = header.n_entries;
ctx.info->keyframes_table = keyframes_table;
}
// timing information table
void parse_stts(struct parse_context ctx)
{
struct table_header header;
read(ctx.fd, &header, sizeof(struct table_header));
header.n_entries = ntohl(header.n_entries);
printf(" stts table size %u\n", header.n_entries);
size_t byte_count = header.n_entries * sizeof(struct stts_entry);
struct stts_entry* table = malloc(byte_count);
read(ctx.fd, table, byte_count);
for(size_t i=0; i<header.n_entries; i++) {
table[i].sample_count = ntohl(table[i].sample_count);
table[i].sample_duration = ntohl(table[i].sample_duration);
printf(" count: %u, duration: %u\n", table[i].sample_count, table[i].sample_duration);
}
ctx.info->timing_table_length = header.n_entries;
ctx.info->timing_table = table;
}
// sample sizes table
void parse_stsz(struct parse_context ctx)
{
struct stsz_header header;
read(ctx.fd, &header, sizeof(struct stsz_header));
header.n_entries = ntohl(header.n_entries);
printf(" stsz table size (# of frames) %u\n", header.n_entries);
size_t byte_count = header.n_entries * sizeof(uint32_t);
uint32_t* table = malloc(byte_count);
read(ctx.fd, table, byte_count);
uint32_t total_bytes = 0;
for(size_t i=0; i<header.n_entries; i++) {
table[i] = ntohl(table[i]);
total_bytes += table[i];
}
ctx.info->frame_size_table_length = header.n_entries;
ctx.info->frame_size_table = table;
ctx.info->length_in_bytes = total_bytes;
printf(" total bytes %u\n", total_bytes);
}
// sample tables
void parse_stbl(struct parse_context ctx)
{
struct atom_header curr_header;
uint32_t curr_offset = ctx.curr_offset;
for(;;) {
bool go_on = read_atom_header(ctx.fd, &curr_header);
printf(" got subatom, size=%llu, type=%.4s\n", atom_size(curr_header), curr_header.type);
if(!go_on) {
break;
}
int subatom_start_offset = curr_offset + atom_header_size(curr_header);
curr_offset += atom_size(curr_header);
struct parse_context child_ctx = child_context(ctx, subatom_start_offset, curr_offset);
if(!strncmp(curr_header.type, "stsd", 4)) {
parse_stsd(child_ctx);
}
if(!strncmp(curr_header.type, "stsc", 4)) {
parse_stsc(child_ctx);
}
if(!strncmp(curr_header.type, "stco", 4)) {
parse_stco(child_ctx);
}
if(!strncmp(curr_header.type, "stss", 4)) {
parse_stss(child_ctx);
}
if(!strncmp(curr_header.type, "stts", 4)) {
parse_stts(child_ctx);
}
if(!strncmp(curr_header.type, "stsz", 4)) {
parse_stsz(child_ctx);
}
if(curr_offset >= ctx.max_offset) {
break;
}
int rc = lseek(ctx.fd, curr_offset, SEEK_SET);
if(rc < 0)
die("lseek");
}
}
// media information
void parse_minf(struct parse_context ctx)
{
struct atom_header curr_header;
uint32_t curr_offset = ctx.curr_offset;
for(;;) {
bool go_on = read_atom_header(ctx.fd, &curr_header);
printf(" got subatom, size=%llu, type=%.4s\n", atom_size(curr_header), curr_header.type);
if(!go_on) {
break;
}
int subatom_start_offset = curr_offset + atom_header_size(curr_header);
curr_offset += atom_size(curr_header);
struct parse_context child_ctx = child_context(ctx, subatom_start_offset, curr_offset);
if(!strncmp(curr_header.type, "stbl", 4)) {
parse_stbl(child_ctx);
}
if(curr_offset >= ctx.max_offset) {
break;
}
int rc = lseek(ctx.fd, curr_offset, SEEK_SET);
if(rc < 0)
die("lseek");
}
}
// media header
void parse_mdhd(struct parse_context ctx)
{
struct mdhd_data header;
read(ctx.fd, &header, sizeof(struct mdhd_data));
header.timescale = ntohl(header.timescale);
header.duration = ntohl(header.duration);
ctx.info->timescale = header.timescale;
ctx.info->duration = header.duration;
uint32_t timescale_per_minute = 60*header.timescale;
uint32_t duration_minutes = header.duration / timescale_per_minute;
uint32_t duration_seconds = (header.duration % timescale_per_minute) / header.timescale;
printf(" timescale = %u ticks/s, duration %um%us\n", header.timescale, duration_minutes, duration_seconds);
}
// media
void parse_mdia(struct parse_context ctx)
{
struct atom_header curr_header;
uint32_t curr_offset = ctx.curr_offset;
for(;;) {
bool go_on = read_atom_header(ctx.fd, &curr_header);
printf(" got subatom, size=%llu, type=%.4s\n", atom_size(curr_header), curr_header.type);
if(!go_on) {
break;
}
int subatom_start_offset = curr_offset + atom_header_size(curr_header);
curr_offset += atom_size(curr_header);
struct parse_context child_ctx = child_context(ctx, subatom_start_offset, curr_offset);
if(!strncmp(curr_header.type, "minf", 4)) {
parse_minf(child_ctx);
}
if(!strncmp(curr_header.type, "mdhd", 4)) {
parse_mdhd(child_ctx);
}
if(curr_offset >= ctx.max_offset) {
break;
}
int rc = lseek(ctx.fd, curr_offset, SEEK_SET);
if(rc < 0)
die("lseek");
}
}
// track header
// returns true if this is a video track
bool parse_tkhd(struct parse_context ctx)
{
struct tkhd_data header;
read(ctx.fd, &header, sizeof(struct tkhd_data));
return (header.width != 0 && header.height != 0);
}
// track
void parse_trak(struct parse_context ctx)
{
struct atom_header curr_header;
uint32_t curr_offset = ctx.curr_offset;
for(;;) {
bool go_on = read_atom_header(ctx.fd, &curr_header);
printf(" got subatom, size=%llu, type=%.4s\n", atom_size(curr_header), curr_header.type);
if(!go_on) {
break;
}
int subatom_start_offset = curr_offset + atom_header_size(curr_header);
curr_offset += atom_size(curr_header);
struct parse_context child_ctx = child_context(ctx, subatom_start_offset, curr_offset);
if(!strncmp(curr_header.type, "tkhd", 4)) {
bool is_video = parse_tkhd(child_ctx);
if(!is_video) {
printf(" not a video track\n");
break;
}
}
if(!strncmp(curr_header.type, "mdia", 4)) {
parse_mdia(child_ctx);
}
if(curr_offset >= ctx.max_offset) {
break;
}
int rc = lseek(ctx.fd, curr_offset, SEEK_SET);
if(rc < 0)
die("lseek");
}
}
// movie
void parse_moov(struct parse_context ctx)
{
struct atom_header curr_header;
uint32_t curr_offset = ctx.curr_offset;
for(;;) {
bool go_on = read_atom_header(ctx.fd, &curr_header);
printf(" got subatom, size=%llu, type=%.4s\n", atom_size(curr_header), curr_header.type);
if(!go_on) {
break;
}
int subatom_start_offset = curr_offset + atom_header_size(curr_header);
curr_offset += atom_size(curr_header);
struct parse_context child_ctx = child_context(ctx, subatom_start_offset, curr_offset);
if(!strncmp(curr_header.type, "trak", 4)) {
parse_trak(child_ctx);
}
if(curr_offset >= ctx.max_offset) {
break;
}
int rc = lseek(ctx.fd, curr_offset, SEEK_SET);
if(rc < 0)
die("lseek");
}
}
struct mp4_video_info parse_file(const char* filename)
{
int fd = open(filename, O_RDONLY);
if(fd < 0)
die("open");
int rc = 0;
struct stat stbuf;
rc = fstat(fd, &stbuf);
if(rc < 0)
die("fstat");
int offset = 0;
int file_len = stbuf.st_size;
struct atom_header curr_header;
struct parse_context ctx = empty_context(fd, file_len);
for(;;) {
bool go_on = read_atom_header(fd, &curr_header);
printf("got atom, size=%llu, type=%.4s\n", atom_size(curr_header), curr_header.type);
if(!go_on) {
printf("done. size 0\n");
break;
}
int subatom_start_offset = offset + atom_header_size(curr_header);
offset += atom_size(curr_header);
struct parse_context child_ctx = child_context(ctx, subatom_start_offset, offset);
if(!strncmp(curr_header.type, "moov", 4)) {
parse_moov(child_ctx);
}
if(offset >= file_len) {
printf("done. offset too large\n");
break;
}
int rc = lseek(fd, offset, SEEK_SET);
if(rc < 0)
die("lseek");
}
return *ctx.info;
}
uint32_t seconds_to_frame(struct mp4_video_info* info, uint32_t secs)
{
uint32_t timestamp = secs * info->timescale;
assert(timestamp < info->duration);
uint32_t frame_no = 0;
for(uint32_t i = 0; i<info->timing_table_length; i++) {
uint32_t curr_frame_time = info->timing_table[i].sample_duration;
uint32_t number_of_frames = timestamp / curr_frame_time;
if(info->timing_table[i].sample_count < number_of_frames)
number_of_frames = info->timing_table[i].sample_count;
frame_no += number_of_frames;
timestamp -= number_of_frames * curr_frame_time;
assert(timestamp + curr_frame_time > 0);
if(timestamp <= 0)
break;
}
return frame_no;
}
struct chunk_info {
uint32_t byte_offset_in_file;
uint32_t number_of_frames;
};
// returned pointer should be freed by the caller
struct chunk_info* collect_chunk_info(struct mp4_video_info* info)
{
uint32_t number_of_chunks = info->chunk_offset_table_length;
uint32_t byte_count = number_of_chunks*sizeof(struct chunk_info);
struct chunk_info* res = malloc(byte_count);
uint32_t chunk_length_idx = 0;
for(uint32_t i=0; i<number_of_chunks; i++) {
if(chunk_length_idx < info->chunk_mapping_table_length-1 &&
info->chunk_mapping_table[chunk_length_idx+1].first_chunk >= i)
chunk_length_idx++;
res[i].number_of_frames = info->chunk_mapping_table[chunk_length_idx].samples_per_chunk;
res[i].byte_offset_in_file = info->chunk_offset_table[i];
}
return res;
}
uint32_t frame_offset(struct mp4_video_info* info, struct chunk_info* chunks, uint32_t frame_no)
{
uint32_t number_of_chunks = info->chunk_offset_table_length;
uint32_t start_of_current_chunk = 0;
uint32_t found_chunk = 0;
for(uint32_t i=0; i<number_of_chunks; i++) {
uint32_t start_of_next_chunk = start_of_current_chunk + chunks[i].number_of_frames;
if(start_of_next_chunk > frame_no) {
found_chunk = i;
break;
}
start_of_current_chunk = start_of_next_chunk;
}
uint32_t offset = chunks[found_chunk].byte_offset_in_file;
for(uint32_t i=start_of_current_chunk; i<frame_no; i++)
offset += info->frame_size_table[i];
return offset;
}
uint32_t get_nearest_keyframe(struct mp4_video_info* info, uint32_t target_frame)
{
uint32_t nearest_keyframe = 0;
for(uint32_t i=0; i<info->keyframes_table_length; i++) {
uint32_t keyframe = info->keyframes_table[i] - 1;
if(keyframe < target_frame && keyframe > nearest_keyframe)
nearest_keyframe = keyframe;
}
return nearest_keyframe;
}
int main(int argc, char** argv)
{
if(argc < 2) print_usage(argv[0]);
struct mp4_video_info info = parse_file(argv[1]);
printf("Timestamp (in seconds?): ");
int secs;
scanf("%d", &secs);
uint32_t frame_no = seconds_to_frame(&info, secs);
printf("Corresponds to frame %u\n", frame_no);
uint32_t nearest_keyframe = get_nearest_keyframe(&info, frame_no);
printf("Nearest keyframe: %u\n", nearest_keyframe);
// The parts after that are not thoroughly tested and doesn't seem to work correctly
struct chunk_info* chunks = collect_chunk_info(&info);
uint32_t offset = frame_offset(&info, chunks, nearest_keyframe);
uint32_t length = info.frame_size_table[nearest_keyframe];
printf("Position in file: offset %u, length %u\n", offset, length);
lseek(info.fd, offset, SEEK_SET);
uint8_t* framebuf = malloc(length);
uint32_t bytes_read = read(info.fd, framebuf, length);
printf("bytes_read %u, length %u\n", bytes_read, length);
printf("First 160 bytes of the frame: \n");
assert(bytes_read == length);
for(int i=0; i<160; i++) {
printf("%02x ", framebuf[i]);
if(i%8 == 3)
printf(" ");
if(i%8 == 7)
printf("\n");
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment