Skip to content

Instantly share code, notes, and snippets.

@Justasic
Last active November 28, 2017 12:33
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Justasic/ad8f04c465a8bd121da6 to your computer and use it in GitHub Desktop.
Save Justasic/ad8f04c465a8bd121da6 to your computer and use it in GitHub Desktop.
This is a program which decodes EA's .BIG file format used for C&C Generals and C&C Generals Zero Hour. Requires vec (https://github.com/rxi/vec)
/*
Copyright (c) 2017 Justin Crawford <Justin@stacksmash.net>
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
*/
#define _GNU_SOURCE 1
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <errno.h>
#include <string.h>
#include <stdint.h>
#include <stddef.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <limits.h>
#include <endian.h>
#include <dirent.h>
// Requires vec (see https://github.com/rxi/vec)
#include "vec.h"
#define MIN(a, b) (((b) < (a)) ? b : a)
// This structure is the header of the entire .BIG archive file
// This structure should only occure in the file once.
typedef struct header_s
{
char extension[4]; // BIGF or BIG4 - little endian?
uint32_t size; // Total size of the file (including these 4 bytes) - little endian
uint32_t files; // Number of files in the archive - big endian
uint32_t index_table; // Total size of the index table in bytes - big endian
} header_t;
// Index entry for each file in the index table
typedef struct FileEntry_s
{
uint32_t pos; // File position - big endian
uint32_t size; // File size - big endian
char *filename; // File name
struct FileEntry_s *next; // Next struct in the list
} FileEntry_t;
typedef vec_t(FileEntry_t*) FileVector_t;
typedef vec_t(char*) filelist_t;
// The .BIG file format is setup in this order using the above structs:
// .Big file {
// header_t
//
// index table {
// FileEntry_t
// FileEntry_t
// ...
// }
//
// Data
// ...
// }
//
// The file cosists of a global header, an index of the embedded files,
// and the actual file data.
//
// This is just a simple example of how the format is structured inside the file.
// This format is the same against "BIG4" and "BIGF" files
// The code below depicts how to decode the file below.
//
// The data is in whatever endianness it was archived into and starts at the position
// given in the FileEntry_t structure and ends where position + file size is.
// You should use the index table for finding all files instead of calculating offsets.
static inline const char *GetHighestSize(uint64_t size)
{
static const char *sizes[] = { "B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB" };
unsigned int si = 0;
for (; 1024 < size; si++, size >>= 10)
;
// We use alloca here instead of asprintf or malloc so we can have automatic deallocation once we're done.
char *ret = NULL;
size_t len = snprintf(ret, 0, "%ld %s", size, si > sizeof(sizes) ? "(hello future!)" : sizes[si]);
// +1 for the null terminator
ret = alloca(len+1);
snprintf(ret, len+1, "%ld %s", size, si > sizeof(sizes) ? "(hello future!)" : sizes[si]);
return ret;
}
void _mkdir(const char *dir)
{
char tmp[256];
char *p = NULL;
size_t len;
snprintf(tmp, sizeof(tmp), "%s", dir);
len = strlen(tmp);
if(tmp[len - 1] == '/')
tmp[len - 1] = 0;
for(p = tmp + 1; *p; p++)
{
if(*p == '/')
{
*p = 0;
mkdir(tmp, S_IRWXU);
*p = '/';
}
}
mkdir(tmp, S_IRWXU);
}
// Normalize windows paths to POSIX paths.
// Windows will accept POSIX paths and fix them automatically.
char *NormalizePath(char *path)
{
char *orig = path;
while (*path++)
{
if (*path == '\\')
*path = '/';
}
return orig;
}
// Give this function a file path and it will iterate backwards
// to the first available '/' and then make directories to that
// path.
void MakeDirectoryFromPath(const char *path)
{
// Iterate backwards to find a folders we need to make.
if (!path)
return;
char *str = NormalizePath(strdup(path));
size_t len = strlen(str);
for (size_t i = len; i > 0; --i)
{
if (str[i] == '/')
{
str[i+1] = 0;
break;
}
}
_mkdir(str);
free(str);
}
static inline int is_directory(const char *dir)
{
struct stat st_buf;
memset(&st_buf, 0, sizeof(struct stat));
if (stat(dir, &st_buf) != 0)
return 0;
if (S_ISDIR(st_buf.st_mode))
return 1;
return 0;
}
off_t fsize(const char *filename)
{
struct stat st;
if (stat(filename, &st) == 0)
return st.st_size;
fprintf(stderr, "Cannot determine size of %s: %s\n", filename, strerror(errno));
return -1;
}
static inline filelist_t getdir(const char *dir)
{
filelist_t files;
vec_init(&files);
DIR *dp;
struct dirent *dirp;
if ((dp = opendir(dir)) == NULL)
{
printf("Error opening \"%s\": %s (%d)\n", dir, strerror(errno), errno);
return files;
}
while ((dirp = readdir(dp)) != NULL)
{
const char *dirn = dirp->d_name;
char *filename = NULL;
if (!strcmp(dirn, ".") || !strcmp(dirn, ".."))
continue;
asprintf(&filename, "%s/%s", dir, dirn);
vec_push(&files, filename);
}
closedir(dp);
return files;
}
static inline filelist_t GPKRecursiveDirectoryList(const char *dir)
{
filelist_t files;
filelist_t tmp;
vec_init(&files);
vec_init(&tmp);
if (is_directory(dir))
{
files = getdir(dir);
int32_t iter = 0;
char *file = NULL;
vec_foreach(&files, file, iter)
{
if (is_directory(file))
{
filelist_t temp = GPKRecursiveDirectoryList(file);
vec_extend(&tmp, &temp);
}
}
}
// Append the vectors together
vec_extend(&files, &tmp);
return files;
}
// Copy data from teh file to an actual file
// source is the source file, srcoffset is the
// offset inside the source file handle, dest is
// the new destination file to copy to.
void CopyFiles(FILE *source, size_t srcoffset, size_t length, FILE *dest)
{
if (!source || !dest)
return;
#define COPY_SIZE 1024
// Initialize our file size
size_t sz = 0;
// Go to the place where the file is in the archive
fseek(source, srcoffset, SEEK_SET);
// Allocate a buffer to copy between streams
uint8_t *buf = malloc(COPY_SIZE);
// Copy loop
while (sz < length)
{
// How much to copy?
size_t cpysize = MIN(COPY_SIZE, (length - sz));
// Clear the buffer
memset(buf, 0, COPY_SIZE);
// Copy a chunk
fread(buf, cpysize, 1, source);
fwrite(buf, cpysize, 1, dest);
// Add the size
sz += cpysize;
}
printf("Copied %ld (%s) bytes\n", sz, GetHighestSize(sz));
// free our buffer
free(buf);
}
int compare(const void *va, const void *vb)
{
FileEntry_t *a = *(FileEntry_t**)va, *b = *(FileEntry_t**)vb;
return strcmp(a->filename, b->filename);
}
// Create a big file based on the file list.
// This function will list all directories and subdirs
// to include their file paths into the archive. all
// paths are relative.
void Create(const char *archive, int argc, char **argv)
{
FILE *f = fopen(archive, "wb");
if (!f)
{
printf("Failed to open file \"%s\": %s (%d)\n", archive, strerror(errno), errno);
return;
}
printf("Create called with %d initial file%c:\n", argc, argc == 1 ? '\0' : 's');
// Step 1: Generate a list of files.
filelist_t files;
vec_init(&files);
for (int i = 0; i < argc; ++i)
{
printf("file[%d]: %s\n", i, argv[i]);
filelist_t temp;
vec_init(&temp);
if (is_directory(argv[i]))
{
temp = GPKRecursiveDirectoryList(argv[i]);
vec_extend(&files, &temp);
}
else
vec_push(&files, strdup(argv[i]));
}
// Step 2: Remove directories.
int iter = 0;
char *file = NULL;
vec_foreach(&files, file, iter)
{
// Remove the directory.
if (is_directory(file))
{
printf("\"%s\" is a directory, skipping...\n", file);
vec_remove(&files, file);
free(file);
iter--;
}
}
printf("Found %d files to archive:\n", files.length);
// alright so we got all the files recursively, lets
// create an archive.
// Step 3: Write the BIG header.
// Magic bytes
fwrite("BIGF", 1, 4, f);
// Filesize (temporarily 0)
fseek(f, sizeof(uint32_t), SEEK_CUR);
// Number of files in the archive.
uint32_t tmp = htobe32(files.length);
fwrite(&tmp, 1, sizeof(uint32_t), f);
// Location of the index table (which is EOF so placemark for the moment.)
fseek(f, sizeof(uint32_t), SEEK_CUR);
// this will contain the total size of the file table after the header above.
uint32_t preludebytes = 0;
// Step 4: Convert all the filenames to a FileEntry_t table, then write the data of the files into
// the file itself.
FileVector_t filev;
vec_init(&filev);
file = iter = 0;
vec_foreach(&files, file, iter)
{
// The big archive does not contain directories.
if (is_directory(file))
printf("WARNING: Found directory \"%s\" in file list which shouldn't contain directories!\n", file);
FileEntry_t *entry = malloc(sizeof(FileEntry_t));
entry->filename = file; // we can copy the pointer cuz it's allocated by asprintf in GPKRecursiveDirectoryList
// Temporary.
entry->pos = 0x0;
// Get the size of the file
entry->size = fsize(file);
// Add the number of bytes together to increase the table size.
// this includes the length of filename + null byte + file size integer + file position integer.
preludebytes += strlen(entry->filename) + 1 + (sizeof(uint32_t) * 2);
// Not used.
entry->next = NULL;
vec_push(&filev, entry);
}
// Step 5: Write location of first index entry to the BIGF header.
// Seek to the index_table integer
fseek(f, sizeof(header_t) - sizeof(uint32_t), SEEK_SET);
// Calculate the location and make Big Endian then write to the archive.
tmp = htobe32(preludebytes + sizeof(header_t));
fwrite(&tmp, 1, sizeof(uint32_t), f);
// Seek to where we need to be to start writing files.
fseek(f, preludebytes + sizeof(header_t), SEEK_SET);
// Sort the vector.
vec_sort(&filev, compare);
// Debug message
printf("Started writing files at 0x%lX\n", ftell(f));
// Step 6: Start copying files into the archive.
FileEntry_t *filee;
iter = 0;
vec_foreach(&filev, filee, iter)
{
FILE *src = fopen(filee->filename, "rb");
if (!src)
{
fprintf(stderr, "Failed to open file \"%s\" for reading: %s (%d)\n", filee->filename, strerror(errno), errno);
// Remove the file, deallocate the name, and continue.
vec_remove(&filev, filee);
free(filee->filename);
free(filee);
iter--;
continue;
}
// Set our file position.
filee->pos = ftell(f);
// Message.
printf("Writing file \"%s\" (%s) into archive \"%s\" at 0x%X\n",
filee->filename, GetHighestSize(filee->size), archive, filee->pos);
printf("Next calculated file position starts at 0x%X\n", filee->pos + filee->size);
// Copy the file into the archive.
CopyFiles(src, 0, filee->size, f);
// Cleanup.
fclose(src);
}
// Seek to just after the BIG header so we can write the index.
fseek(f, sizeof(header_t), SEEK_SET);
// Debug message?
printf("Writing file index...\n");
// Step 7: Write the file table.
uint32_t writtensize = 0;
iter = 0;
vec_foreach(&filev, filee, iter)
{
printf("Writing index entry %d: \"%s\" (%d - %s) which is located at 0x%X \n", iter, filee->filename, filee->size, GetHighestSize(filee->size), filee->pos);
// Copy the first 8 bytes of the header, reverse it for big
filee->pos = htobe32(filee->pos);
filee->size = htobe32(filee->size);
// Copy filee->size and file->pos to the archive index table
fwrite(filee, 1, sizeof(uint32_t) * 2, f);
// Copy the string.
fwrite(filee->filename, 1, strlen(filee->filename) + 1, f);
writtensize += strlen(filee->filename) + 1 + (sizeof(uint32_t) * 2);
// Deallocate.
free(filee->filename);
free(filee);
}
if (writtensize != preludebytes)
printf("WARNING: Written index is %d (%s) bytes smaller than estimated index! (%d - %d or %s - %s)\n",
writtensize - preludebytes, GetHighestSize(writtensize - preludebytes), writtensize, preludebytes,
GetHighestSize(writtensize), GetHighestSize(preludebytes));
else
printf("Written index and estimated index size %d (%s)\n", writtensize, GetHighestSize(writtensize));
// Step 8: Return to the header + 4 bytes and write both the total file size
fseek(f, 0, SEEK_END);
uint32_t totalsize = ftell(f);
rewind(f);
// Skip the "BIGF" magic bytes
fseek(f, 4, SEEK_SET);
// Write our bytes.
fwrite(&totalsize, 1, sizeof(uint32_t), f);
// We're now successful at writing the entire file. Clean up.
fclose(f);
printf("Writing finished successfully!\n");
}
// Extract a big file
void Extract(const char *filepath, const char *dest, const char *SpecificFile)
{
FILE *f = fopen(filepath, "rb");
if (!f)
{
fprintf(stderr, "Failed to open file: %s\n", strerror(errno));
exit(EXIT_FAILURE);
}
// Print file size
fseek(f, 0, SEEK_END);
size_t filesize = ftell(f);
rewind(f);
header_t h;
fread(&h, sizeof(header_t), 1, f);
// Swap from Big Endian to Little Endian for x86 and x86_64 processors.
h.files = be32toh(h.files);
h.index_table = be32toh(h.index_table);
// Null termination so we can print stuff.
char str[5];
memcpy(str, h.extension, 4);
str[4] = 0;
// Print some useful info on the header.
printf("EXT: %s\n", str);
printf("Size: %d (%s)\nFiles: %d\nindex_table: %d (0x%X)\n", h.size, GetHighestSize(h.size), h.files, h.index_table, h.index_table);
// Get the file size
printf("File size: %ld (%s)\n", filesize, GetHighestSize(filesize));
// Now analyze the index table.
printf("Dumping index table:\n\n");
// Start an entries table.
FileEntry_t *entry = malloc(sizeof(FileEntry_t));
memset(entry, 0, sizeof(FileEntry_t));
// Save the front of the list.
FileEntry_t *entries = entry;
int found = 0;
for (uint32_t i = 0; i < h.files; ++i)
{
uint32_t pos = 0;
uint32_t size = 0;
char filename[2048]; // 2 Kilobytes for a file string. This should be enough.
// Get the file position
fread(&pos, sizeof(uint32_t), 1, f);
// get the file size
fread(&size, sizeof(uint32_t), 1, f);
// Reverse the sizes
pos = be32toh(pos);
size = be32toh(size);
// Now read the string, we must do this 1 char at a time
// because it is a terrible file format. Anyone sane
// would've included the string length instead of using
// a null-byte. We have unlimited storage for games now so
// we don't really need this awful format but whatever.
char *str = filename;
while(1)
{
uint8_t ch = 0;
// Read a char, then append it to a string
fread(&ch, 1, 1, f);
*str = ch;
str++;
//printf("ch: %c 0x%X\n", (ch == 0 ? '@' : ch), ch);
// Is the char null?
if (ch == 0)
break;
}
// Check and see if this is the file (if applicable)
if (SpecificFile)
{
if (strncmp(SpecificFile, filename, strlen(SpecificFile)))
continue; // Filenames don't match.
else
found = 1;
}
// At this point, you would fill out the FileEntry_t struct and then
// continue using the data, for now we just print the information.
//
entry->pos = pos;
entry->size = size;
entry->filename = strdup(filename);
entry->next = malloc(sizeof(FileEntry_t));
entry = entry->next;
memset(entry, 0, sizeof(FileEntry_t));
// Print file info.
printf(" Index Num: %u\n File: %s\n Size: %u (%s)\n Position: 0x%X\n\n", i, filename, size, GetHighestSize(size), pos);
}
if (SpecificFile && !found)
{
printf("Could not find file \"%s\" in archive \"%s\"\n", SpecificFile, filepath);
fclose(f);
return;
}
// Now extract the entries.
for (FileEntry_t *file = entries; file; file = file->next)
{
if (!file->filename)
continue;
printf("Extracting \"%s\" to ", file->filename);
// Go to the file's position
fseek(f, file->pos, SEEK_SET);
char *path = NULL;
// figure out the path
char pbuf[PATH_MAX];
realpath(dest, pbuf);
asprintf(&path, "%s/%s", pbuf, file->filename);
// Fix the path.
path = NormalizePath(path);
printf("\"%s\" ...\n", path);
// Make sure the folder(s) exist
MakeDirectoryFromPath(path);
// Open the file for binary writing
FILE *f2 = fopen(path, "wb");
// Copy the data.
CopyFiles(f, file->pos, file->size, f2);
free(path);
// Close our file.
fclose(f2);
}
// Deallocate our stuff.
for (FileEntry_t *f = entries; f;)
{
free(f->filename);
FileEntry_t *fold = f;
f = f->next;
free(fold);
}
fclose(f);
}
void ListFiles(const char *file)
{
FILE *f = fopen(file, "rb");
if (!f)
{
fprintf(stderr, "Failed to open file: %s\n", strerror(errno));
exit(EXIT_FAILURE);
}
// Print file size
fseek(f, 0, SEEK_END);
size_t filesize = ftell(f);
rewind(f);
header_t h;
fread(&h, sizeof(header_t), 1, f);
// Fix endianness to make this even useful.
h.files = be32toh(h.files);
h.index_table = be32toh(h.index_table);
// Null termination so we can print stuff.
char str[5];
memcpy(str, h.extension, 4);
str[4] = 0;
// Print some useful info on the header.
printf("EXT: %s\n", str);
printf("Size: %d (%s)\nFiles: %d\nindex_table: %d (0x%X)\n", h.size, GetHighestSize(h.size), h.files, h.index_table, h.index_table);
// Get the file size
printf("File size: %ld (%s)\n", filesize, GetHighestSize(filesize));
// Now analyze the index table.
printf("Dumping index table:\n\n");
for (uint32_t i = 0; i < h.files; ++i)
{
uint32_t pos = 0;
uint32_t size = 0;
char filename[2048]; // 2 Kilobytes for a file string. This should be enough.
// Get the file position
fread(&pos, sizeof(uint32_t), 1, f);
// get the file size
fread(&size, sizeof(uint32_t), 1, f);
// Reverse the sizes
pos = be32toh(pos);
size = be32toh(size);
// Now read the string, we must do this 1 char at a time
// because it is a terrible file format. Anyone sane
// would've included the string length instead of using
// a null-byte. We have unlimited storage for games now so
// we don't really need this awful format but whatever.
char *str = filename;
while(1)
{
uint8_t ch = 0;
// Read a char, then append it to a string
fread(&ch, 1, 1, f);
*str = ch;
str++;
//printf("ch: %c 0x%X\n", (ch == 0 ? '@' : ch), ch);
// Is the char null?
if (ch == 0)
break;
}
// Print file info.
printf(" Index Num: %u\n File: %s\n Size: %u (%s)\n Position: 0x%.7X\n\n", i, filename, size, GetHighestSize(size), pos);
}
fclose(f);
}
void DumpHelp(const char *filename)
{
fprintf(stderr, "Syntax: %s [option] [file(s)]...\n\n", filename);
fprintf(stderr, " -c Create an archive\n");
fprintf(stderr, " -x Extract a file\n");
fprintf(stderr, " -l List files in archive\n");
fprintf(stderr, " -v Verbose\n");
fprintf(stderr, " -h Print this message\n");
}
// Entry point :D
int main(int argc, char **argv)
{
if (argc == 1)
{
DumpHelp(argv[0]);
return EXIT_FAILURE;
}
printf("Args:\n");
for (int i = 0; i < argc; ++i)
printf("arg[%d]: %s\n", i, argv[i]);
for (int i = 1; i < argc; ++i)
{
char *msg = argv[i];
if (msg[0] == '-')
{
switch(msg[1])
{
case 'c': // Create an archive
// The logic for this is that you do -c archive.big [files|dirs]...
// allowing files to be added similarly to tar. no other arguments
// should be specified prior to this
if (i + 2 > argc)
{
fprintf(stderr, "You must specify a file name to create\n");
return EXIT_FAILURE;
}
// Create an archive with file name and remaining arguments.
Create(argv[i + 1], argc - i - 2, (argv + i + 2));
break;
case 'x': // Extract an archive
if (i + 2 > argc)
{
fprintf(stderr, "You must specify a file name to extract\n");
return EXIT_FAILURE;
}
else if (i + 3 > argc)
{
fprintf(stderr, "You must specify a destination\n");
return EXIT_FAILURE;
}
if (i + 4 <= argc)
Extract(argv[i + 1], argv[i + 2], argv[i + 3]);
else
Extract(argv[i + 1], argv[i + 2], NULL);
break;
case 'l': // List files in the archive
// This is easy. Just dump the name table to the terminal.
if (i + 2 > argc)
{
fprintf(stderr, "You must specify a file name or list of files to list\n");
return EXIT_FAILURE;
}
for (int o = i + 1; o < argc; ++o)
{
printf("\n%s:\n", argv[o]);
ListFiles(argv[o]);
}
break;
case 'v': // verbose mode
// Currently ignored until I feel like adding a debug mode.
// basically verbose always enabled.
break;
case 'h': // Dump help screen.
default:
DumpHelp(argv[0]);
return EXIT_FAILURE;
}
}
}
return EXIT_SUCCESS;
}
@GrahamRCT
Copy link

Hi Justastic,

I found a bug in this code and fixed it. Thought I'd let you know

fwrite(filee->filename, 1, strlen(filee->filename), f);
should become
fwrite(filee->filename, 1, strlen(filee->filename) + 1, f);
Otherwise it ignores the null character behind each filename which causes an offset problem.

Second thing: What license is this? Can I use the code in my own projects? If so, under what terms?

@Justasic
Copy link
Author

Hey thanks for the fix, I didn't notice :D
I'll put it under the MIT license, I wrote this code long ago and haven't cared much about it since so it may have some code rot.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment