Last active
February 17, 2024 14:18
-
-
Save kuzux/fa502dbcb1ed1d8a950d38c3aa13a2a0 to your computer and use it in GitHub Desktop.
Getting a bunch of properties about the video out of an mp4 file
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <string.h> | |
#include <stdlib.h> | |
#include <unistd.h> | |
#include <fcntl.h> | |
#include <errno.h> | |
#include <stdbool.h> | |
#include <arpa/inet.h> | |
#include <sys/stat.h> | |
#include <assert.h> | |
void print_usage(const char* prog) | |
{ | |
fprintf(stderr, "Usage: %s FILENAME\n", prog); | |
exit(1); | |
} | |
void die(const char* msg) | |
{ | |
perror(msg); | |
exit(1); | |
} | |
struct atom_header { | |
uint32_t size; | |
char type[4]; | |
uint64_t ext_size; | |
}; | |
// returns true if we should continue reading atoms | |
bool read_atom_header(int fd, struct atom_header* header) | |
{ | |
read(fd, header, sizeof(struct atom_header) - 8); | |
header->size = ntohl(header->size); | |
if(header->size == 1) { | |
uint64_t ext_size; | |
read(fd, &ext_size, sizeof(uint64_t)); | |
header->ext_size = ntohll(ext_size); | |
} | |
return (header->size != 0); | |
} | |
uint64_t atom_size(struct atom_header header) | |
{ | |
if(header.size == 1) return header.ext_size; | |
return header.size; | |
} | |
int atom_header_size(struct atom_header header) | |
{ | |
if(header.size == 1) return 16; | |
return 8; | |
} | |
struct tkhd_data { | |
uint8_t dont_care[76]; | |
uint32_t width; | |
uint32_t height; | |
}; | |
struct mdhd_data { | |
uint8_t dont_care[12]; | |
uint32_t timescale; // units per second | |
uint32_t duration; // in units of timescale | |
uint32_t dont_care_end; | |
}; | |
struct table_header { | |
uint32_t flags; // actually version + flags | |
uint32_t n_entries; | |
// folloewed by n entries | |
}; | |
struct stsz_header { | |
uint32_t flags; // actually version + flags | |
uint32_t uniform_size; // 0 if not all frames have the same size | |
uint32_t n_entries; | |
// folloewed by n entries | |
}; | |
struct stsd_entry { | |
uint32_t size; | |
char format[4]; | |
uint8_t reserved[6]; | |
uint16_t data_reference_index; | |
}; | |
struct stsc_entry { | |
uint32_t first_chunk; | |
uint32_t samples_per_chunk; | |
uint32_t sample_description_id; // no need to convert to host | |
}; | |
struct stts_entry { | |
uint32_t sample_count; | |
uint32_t sample_duration; | |
}; | |
struct mp4_video_info { | |
int fd; | |
char format[4]; | |
uint32_t timescale; | |
uint32_t duration; | |
uint32_t width; | |
uint32_t height; | |
uint32_t length_in_bytes; | |
size_t timing_table_length; | |
struct stts_entry* timing_table; | |
size_t chunk_offset_table_length; | |
uint32_t* chunk_offset_table; | |
size_t chunk_mapping_table_length; | |
struct stsc_entry* chunk_mapping_table; | |
size_t keyframes_table_length; | |
uint32_t* keyframes_table; | |
size_t frame_size_table_length; | |
uint32_t* frame_size_table; | |
}; | |
struct parse_context { | |
int fd; | |
uint32_t curr_offset; | |
uint32_t max_offset; // max for the current atom being parsed | |
struct mp4_video_info* info; | |
}; | |
struct parse_context empty_context(int fd, uint32_t file_length) | |
{ | |
struct parse_context res; | |
res.fd = fd; | |
res.curr_offset = 0; | |
res.max_offset = file_length; | |
res.info = malloc(sizeof(struct mp4_video_info)); | |
memset(res.info, 0, sizeof(struct mp4_video_info)); | |
res.info->fd = fd; | |
return res; | |
} | |
struct parse_context child_context(struct parse_context ctx, uint32_t start_offset, uint32_t end_offset) | |
{ | |
struct parse_context res = ctx; | |
res.curr_offset = start_offset; | |
res.max_offset = end_offset; | |
return res; | |
} | |
// chunk offsets table | |
void parse_stco(struct parse_context ctx) | |
{ | |
struct table_header header; | |
read(ctx.fd, &header, sizeof(struct table_header)); | |
header.n_entries = ntohl(header.n_entries); | |
printf(" stco table size %u\n", header.n_entries); | |
ctx.info->chunk_offset_table_length = header.n_entries; | |
size_t byte_count = header.n_entries * sizeof(uint32_t); | |
ctx.info->chunk_offset_table = malloc(byte_count); | |
read(ctx.fd, ctx.info->chunk_offset_table, byte_count); | |
for(size_t i=0; i<header.n_entries; i++) | |
ctx.info->chunk_offset_table[i] = ntohl(ctx.info->chunk_offset_table[i]); | |
} | |
// sample => chunk table | |
void parse_stsc(struct parse_context ctx) | |
{ | |
struct table_header header; | |
read(ctx.fd, &header, sizeof(struct table_header)); | |
header.n_entries = ntohl(header.n_entries); | |
printf(" stsc table size %u\n", header.n_entries); | |
ctx.info->chunk_mapping_table_length = header.n_entries; | |
size_t byte_count = header.n_entries * sizeof(struct stsc_entry); | |
ctx.info->chunk_mapping_table = malloc(byte_count); | |
read(ctx.fd, ctx.info->chunk_mapping_table, byte_count); | |
for(uint32_t i=0; i<header.n_entries; i++) { | |
ctx.info->chunk_mapping_table[i].first_chunk = ntohl(ctx.info->chunk_mapping_table[i].first_chunk); | |
ctx.info->chunk_mapping_table[i].samples_per_chunk = ntohl(ctx.info->chunk_mapping_table[i].samples_per_chunk); | |
} | |
} | |
// sample definition table | |
void parse_stsd(struct parse_context ctx) | |
{ | |
struct table_header header; | |
read(ctx.fd, &header, sizeof(struct table_header)); | |
header.n_entries = ntohl(header.n_entries); | |
printf(" stsd table size %u\n", header.n_entries); | |
assert(header.n_entries == 1); | |
struct stsd_entry entry; | |
read(ctx.fd, &entry, sizeof(struct stsd_entry)); | |
entry.size = ntohl(entry.size); | |
printf(" stsd entry size: %u, format: %.4s\n", entry.size, entry.format); | |
memcpy(ctx.info->format, entry.format, 4); | |
} | |
// keyframes table | |
void parse_stss(struct parse_context ctx) | |
{ | |
struct table_header header; | |
read(ctx.fd, &header, sizeof(struct table_header)); | |
header.n_entries = ntohl(header.n_entries); | |
printf(" stss table (# of keyframes) size %u\n", header.n_entries); | |
size_t byte_count = header.n_entries * sizeof(uint32_t); | |
uint32_t* keyframes_table = malloc(byte_count); | |
read(ctx.fd, keyframes_table, byte_count); | |
for(size_t i=0; i<header.n_entries; i++) { | |
keyframes_table[i] = ntohl(keyframes_table[i]); | |
printf(" frame %u is a keyframe\n", keyframes_table[i]); | |
} | |
ctx.info->keyframes_table_length = header.n_entries; | |
ctx.info->keyframes_table = keyframes_table; | |
} | |
// timing information table | |
void parse_stts(struct parse_context ctx) | |
{ | |
struct table_header header; | |
read(ctx.fd, &header, sizeof(struct table_header)); | |
header.n_entries = ntohl(header.n_entries); | |
printf(" stts table size %u\n", header.n_entries); | |
size_t byte_count = header.n_entries * sizeof(struct stts_entry); | |
struct stts_entry* table = malloc(byte_count); | |
read(ctx.fd, table, byte_count); | |
for(size_t i=0; i<header.n_entries; i++) { | |
table[i].sample_count = ntohl(table[i].sample_count); | |
table[i].sample_duration = ntohl(table[i].sample_duration); | |
printf(" count: %u, duration: %u\n", table[i].sample_count, table[i].sample_duration); | |
} | |
ctx.info->timing_table_length = header.n_entries; | |
ctx.info->timing_table = table; | |
} | |
// sample sizes table | |
void parse_stsz(struct parse_context ctx) | |
{ | |
struct stsz_header header; | |
read(ctx.fd, &header, sizeof(struct stsz_header)); | |
header.n_entries = ntohl(header.n_entries); | |
printf(" stsz table size (# of frames) %u\n", header.n_entries); | |
size_t byte_count = header.n_entries * sizeof(uint32_t); | |
uint32_t* table = malloc(byte_count); | |
read(ctx.fd, table, byte_count); | |
uint32_t total_bytes = 0; | |
for(size_t i=0; i<header.n_entries; i++) { | |
table[i] = ntohl(table[i]); | |
total_bytes += table[i]; | |
} | |
ctx.info->frame_size_table_length = header.n_entries; | |
ctx.info->frame_size_table = table; | |
ctx.info->length_in_bytes = total_bytes; | |
printf(" total bytes %u\n", total_bytes); | |
} | |
// sample tables | |
void parse_stbl(struct parse_context ctx) | |
{ | |
struct atom_header curr_header; | |
uint32_t curr_offset = ctx.curr_offset; | |
for(;;) { | |
bool go_on = read_atom_header(ctx.fd, &curr_header); | |
printf(" got subatom, size=%llu, type=%.4s\n", atom_size(curr_header), curr_header.type); | |
if(!go_on) { | |
break; | |
} | |
int subatom_start_offset = curr_offset + atom_header_size(curr_header); | |
curr_offset += atom_size(curr_header); | |
struct parse_context child_ctx = child_context(ctx, subatom_start_offset, curr_offset); | |
if(!strncmp(curr_header.type, "stsd", 4)) { | |
parse_stsd(child_ctx); | |
} | |
if(!strncmp(curr_header.type, "stsc", 4)) { | |
parse_stsc(child_ctx); | |
} | |
if(!strncmp(curr_header.type, "stco", 4)) { | |
parse_stco(child_ctx); | |
} | |
if(!strncmp(curr_header.type, "stss", 4)) { | |
parse_stss(child_ctx); | |
} | |
if(!strncmp(curr_header.type, "stts", 4)) { | |
parse_stts(child_ctx); | |
} | |
if(!strncmp(curr_header.type, "stsz", 4)) { | |
parse_stsz(child_ctx); | |
} | |
if(curr_offset >= ctx.max_offset) { | |
break; | |
} | |
int rc = lseek(ctx.fd, curr_offset, SEEK_SET); | |
if(rc < 0) | |
die("lseek"); | |
} | |
} | |
// media information | |
void parse_minf(struct parse_context ctx) | |
{ | |
struct atom_header curr_header; | |
uint32_t curr_offset = ctx.curr_offset; | |
for(;;) { | |
bool go_on = read_atom_header(ctx.fd, &curr_header); | |
printf(" got subatom, size=%llu, type=%.4s\n", atom_size(curr_header), curr_header.type); | |
if(!go_on) { | |
break; | |
} | |
int subatom_start_offset = curr_offset + atom_header_size(curr_header); | |
curr_offset += atom_size(curr_header); | |
struct parse_context child_ctx = child_context(ctx, subatom_start_offset, curr_offset); | |
if(!strncmp(curr_header.type, "stbl", 4)) { | |
parse_stbl(child_ctx); | |
} | |
if(curr_offset >= ctx.max_offset) { | |
break; | |
} | |
int rc = lseek(ctx.fd, curr_offset, SEEK_SET); | |
if(rc < 0) | |
die("lseek"); | |
} | |
} | |
// media header | |
void parse_mdhd(struct parse_context ctx) | |
{ | |
struct mdhd_data header; | |
read(ctx.fd, &header, sizeof(struct mdhd_data)); | |
header.timescale = ntohl(header.timescale); | |
header.duration = ntohl(header.duration); | |
ctx.info->timescale = header.timescale; | |
ctx.info->duration = header.duration; | |
uint32_t timescale_per_minute = 60*header.timescale; | |
uint32_t duration_minutes = header.duration / timescale_per_minute; | |
uint32_t duration_seconds = (header.duration % timescale_per_minute) / header.timescale; | |
printf(" timescale = %u ticks/s, duration %um%us\n", header.timescale, duration_minutes, duration_seconds); | |
} | |
// media | |
void parse_mdia(struct parse_context ctx) | |
{ | |
struct atom_header curr_header; | |
uint32_t curr_offset = ctx.curr_offset; | |
for(;;) { | |
bool go_on = read_atom_header(ctx.fd, &curr_header); | |
printf(" got subatom, size=%llu, type=%.4s\n", atom_size(curr_header), curr_header.type); | |
if(!go_on) { | |
break; | |
} | |
int subatom_start_offset = curr_offset + atom_header_size(curr_header); | |
curr_offset += atom_size(curr_header); | |
struct parse_context child_ctx = child_context(ctx, subatom_start_offset, curr_offset); | |
if(!strncmp(curr_header.type, "minf", 4)) { | |
parse_minf(child_ctx); | |
} | |
if(!strncmp(curr_header.type, "mdhd", 4)) { | |
parse_mdhd(child_ctx); | |
} | |
if(curr_offset >= ctx.max_offset) { | |
break; | |
} | |
int rc = lseek(ctx.fd, curr_offset, SEEK_SET); | |
if(rc < 0) | |
die("lseek"); | |
} | |
} | |
// track header | |
// returns true if this is a video track | |
bool parse_tkhd(struct parse_context ctx) | |
{ | |
struct tkhd_data header; | |
read(ctx.fd, &header, sizeof(struct tkhd_data)); | |
return (header.width != 0 && header.height != 0); | |
} | |
// track | |
void parse_trak(struct parse_context ctx) | |
{ | |
struct atom_header curr_header; | |
uint32_t curr_offset = ctx.curr_offset; | |
for(;;) { | |
bool go_on = read_atom_header(ctx.fd, &curr_header); | |
printf(" got subatom, size=%llu, type=%.4s\n", atom_size(curr_header), curr_header.type); | |
if(!go_on) { | |
break; | |
} | |
int subatom_start_offset = curr_offset + atom_header_size(curr_header); | |
curr_offset += atom_size(curr_header); | |
struct parse_context child_ctx = child_context(ctx, subatom_start_offset, curr_offset); | |
if(!strncmp(curr_header.type, "tkhd", 4)) { | |
bool is_video = parse_tkhd(child_ctx); | |
if(!is_video) { | |
printf(" not a video track\n"); | |
break; | |
} | |
} | |
if(!strncmp(curr_header.type, "mdia", 4)) { | |
parse_mdia(child_ctx); | |
} | |
if(curr_offset >= ctx.max_offset) { | |
break; | |
} | |
int rc = lseek(ctx.fd, curr_offset, SEEK_SET); | |
if(rc < 0) | |
die("lseek"); | |
} | |
} | |
// movie | |
void parse_moov(struct parse_context ctx) | |
{ | |
struct atom_header curr_header; | |
uint32_t curr_offset = ctx.curr_offset; | |
for(;;) { | |
bool go_on = read_atom_header(ctx.fd, &curr_header); | |
printf(" got subatom, size=%llu, type=%.4s\n", atom_size(curr_header), curr_header.type); | |
if(!go_on) { | |
break; | |
} | |
int subatom_start_offset = curr_offset + atom_header_size(curr_header); | |
curr_offset += atom_size(curr_header); | |
struct parse_context child_ctx = child_context(ctx, subatom_start_offset, curr_offset); | |
if(!strncmp(curr_header.type, "trak", 4)) { | |
parse_trak(child_ctx); | |
} | |
if(curr_offset >= ctx.max_offset) { | |
break; | |
} | |
int rc = lseek(ctx.fd, curr_offset, SEEK_SET); | |
if(rc < 0) | |
die("lseek"); | |
} | |
} | |
struct mp4_video_info parse_file(const char* filename) | |
{ | |
int fd = open(filename, O_RDONLY); | |
if(fd < 0) | |
die("open"); | |
int rc = 0; | |
struct stat stbuf; | |
rc = fstat(fd, &stbuf); | |
if(rc < 0) | |
die("fstat"); | |
int offset = 0; | |
int file_len = stbuf.st_size; | |
struct atom_header curr_header; | |
struct parse_context ctx = empty_context(fd, file_len); | |
for(;;) { | |
bool go_on = read_atom_header(fd, &curr_header); | |
printf("got atom, size=%llu, type=%.4s\n", atom_size(curr_header), curr_header.type); | |
if(!go_on) { | |
printf("done. size 0\n"); | |
break; | |
} | |
int subatom_start_offset = offset + atom_header_size(curr_header); | |
offset += atom_size(curr_header); | |
struct parse_context child_ctx = child_context(ctx, subatom_start_offset, offset); | |
if(!strncmp(curr_header.type, "moov", 4)) { | |
parse_moov(child_ctx); | |
} | |
if(offset >= file_len) { | |
printf("done. offset too large\n"); | |
break; | |
} | |
int rc = lseek(fd, offset, SEEK_SET); | |
if(rc < 0) | |
die("lseek"); | |
} | |
return *ctx.info; | |
} | |
uint32_t seconds_to_frame(struct mp4_video_info* info, uint32_t secs) | |
{ | |
uint32_t timestamp = secs * info->timescale; | |
assert(timestamp < info->duration); | |
uint32_t frame_no = 0; | |
for(uint32_t i = 0; i<info->timing_table_length; i++) { | |
uint32_t curr_frame_time = info->timing_table[i].sample_duration; | |
uint32_t number_of_frames = timestamp / curr_frame_time; | |
if(info->timing_table[i].sample_count < number_of_frames) | |
number_of_frames = info->timing_table[i].sample_count; | |
frame_no += number_of_frames; | |
timestamp -= number_of_frames * curr_frame_time; | |
assert(timestamp + curr_frame_time > 0); | |
if(timestamp <= 0) | |
break; | |
} | |
return frame_no; | |
} | |
struct chunk_info { | |
uint32_t byte_offset_in_file; | |
uint32_t number_of_frames; | |
}; | |
// returned pointer should be freed by the caller | |
struct chunk_info* collect_chunk_info(struct mp4_video_info* info) | |
{ | |
uint32_t number_of_chunks = info->chunk_offset_table_length; | |
uint32_t byte_count = number_of_chunks*sizeof(struct chunk_info); | |
struct chunk_info* res = malloc(byte_count); | |
uint32_t chunk_length_idx = 0; | |
for(uint32_t i=0; i<number_of_chunks; i++) { | |
if(chunk_length_idx < info->chunk_mapping_table_length-1 && | |
info->chunk_mapping_table[chunk_length_idx+1].first_chunk >= i) | |
chunk_length_idx++; | |
res[i].number_of_frames = info->chunk_mapping_table[chunk_length_idx].samples_per_chunk; | |
res[i].byte_offset_in_file = info->chunk_offset_table[i]; | |
} | |
return res; | |
} | |
uint32_t frame_offset(struct mp4_video_info* info, struct chunk_info* chunks, uint32_t frame_no) | |
{ | |
uint32_t number_of_chunks = info->chunk_offset_table_length; | |
uint32_t start_of_current_chunk = 0; | |
uint32_t found_chunk = 0; | |
for(uint32_t i=0; i<number_of_chunks; i++) { | |
uint32_t start_of_next_chunk = start_of_current_chunk + chunks[i].number_of_frames; | |
if(start_of_next_chunk > frame_no) { | |
found_chunk = i; | |
break; | |
} | |
start_of_current_chunk = start_of_next_chunk; | |
} | |
uint32_t offset = chunks[found_chunk].byte_offset_in_file; | |
for(uint32_t i=start_of_current_chunk; i<frame_no; i++) | |
offset += info->frame_size_table[i]; | |
return offset; | |
} | |
uint32_t get_nearest_keyframe(struct mp4_video_info* info, uint32_t target_frame) | |
{ | |
uint32_t nearest_keyframe = 0; | |
for(uint32_t i=0; i<info->keyframes_table_length; i++) { | |
uint32_t keyframe = info->keyframes_table[i] - 1; | |
if(keyframe < target_frame && keyframe > nearest_keyframe) | |
nearest_keyframe = keyframe; | |
} | |
return nearest_keyframe; | |
} | |
int main(int argc, char** argv) | |
{ | |
if(argc < 2) print_usage(argv[0]); | |
struct mp4_video_info info = parse_file(argv[1]); | |
printf("Timestamp (in seconds?): "); | |
int secs; | |
scanf("%d", &secs); | |
uint32_t frame_no = seconds_to_frame(&info, secs); | |
printf("Corresponds to frame %u\n", frame_no); | |
uint32_t nearest_keyframe = get_nearest_keyframe(&info, frame_no); | |
printf("Nearest keyframe: %u\n", nearest_keyframe); | |
// The parts after that are not thoroughly tested and doesn't seem to work correctly | |
struct chunk_info* chunks = collect_chunk_info(&info); | |
uint32_t offset = frame_offset(&info, chunks, nearest_keyframe); | |
uint32_t length = info.frame_size_table[nearest_keyframe]; | |
printf("Position in file: offset %u, length %u\n", offset, length); | |
lseek(info.fd, offset, SEEK_SET); | |
uint8_t* framebuf = malloc(length); | |
uint32_t bytes_read = read(info.fd, framebuf, length); | |
printf("bytes_read %u, length %u\n", bytes_read, length); | |
printf("First 160 bytes of the frame: \n"); | |
assert(bytes_read == length); | |
for(int i=0; i<160; i++) { | |
printf("%02x ", framebuf[i]); | |
if(i%8 == 3) | |
printf(" "); | |
if(i%8 == 7) | |
printf("\n"); | |
} | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment