Skip to content

Instantly share code, notes, and snippets.

@FireyFly
Last active January 2, 2024 20:38
Show Gist options
  • Save FireyFly/4438a08b5245e00fe090 to your computer and use it in GitHub Desktop.
Save FireyFly/4438a08b5245e00fe090 to your computer and use it in GitHub Desktop.
FUSE1.00 format

FUSE archive format

CSV

For some files, there is a CSV file accompanying the FUSE archive. The CSV contains fields for filename, offset in FUSE file, decompressed file size and (I think) checksum.

Header

struct fuse_header {
  char magic[8]; // FUSE1.00
  u32 count; // #files
  u32 zero1;
  u32 filesize;
};

The header is followed by compressed data for file1, file2, ... (count files).

Compression

Each compressed blob starts with an u32 compressed_size | 0x40000000. Following this is the compressed data, which seems to work akin to LZ compression. Read a (to_copy, then_copy, offset) triplet as such:

  Bytes                                 to_copy  then_copy            offset
-----------------------------------------------------------------------------
  0yyyxxzz zzzzzzzz                      x        y + 3                z + 1
  10yyyyyy xxzzzzzz zzzzzzzz             x        y + 4                z + 1
  110xxyyy zzzzzzzz zzzzzzzz yyyyyyyy    x        (y1 << 7) + y2 + 5   z + 1
  111xxxxx                               (x+1)*4  0                    0
  (0xFD)                                 3        0                    0
  (0xFE)                                 2        0                    0
  (0xFF)                                 1        0                    0

Then read and write to_copy bytes from the compressed data verbatim, followed by then_copy bytes copied from history offset bytes backwards.

Issues

  • Some files are stored verbatim, without neither compression nor a length | 0x40000000 prefix. I have no idea how to tell these apart from compressed data. I thought the 0x40000000 bit might signify compression, but since non-compressed data doesn't include the prefix at all, checking whether that bit is present in the first u32 doesn't seem very reliable.

  • The above decompression was figured out by looking at compressed text files. When I decompress any ASCII text file, everything works out just fine. However, many binary formats, such as PNG, TGA, XLS and others judging by the filenames in the CSV, seem to decompress into nonsense. What's even more weird (to me at least) is that the structure of the decompressed data for these very different formats seem to be very similar.

#include <ctype.h>
#include <limits.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
typedef uint8_t u8;
typedef uint16_t u16;
typedef uint32_t u32;
//-- Decompression --------------------------------------------------
int decompress(FILE *in, FILE *out, size_t filesize) {
uint32_t compressed_size;
fread(&compressed_size, sizeof(u32), 1, in);
if (compressed_size >> 28 != 0x4) return -1;
compressed_size &= 0x3FFFFFFF;
#define SIZE 0x10000
u8 *buf = malloc(SIZE);
int buf_i = 0,
written = 0;
#define WRITE(X) { \
buf[buf_i] = (X); \
fputc(buf[buf_i], out); \
buf_i = (buf_i + 1) % SIZE; \
written++; \
}
#define BUF(X) (buf[(buf_i - (X)) & (SIZE - 1)])
long start = ftell(in);
int ch;
while (ch = fgetc(in), ftell(in) - start < compressed_size && written < filesize) {
// copy `to_copy` bytes from file, then `then_copy` bytes from history with offset `offset`
int to_copy,
then_copy,
offset;
int type = ch >> 5; // type of compression operation
int extra_bytes = type < 4? 1
: type < 6? 2
: type < 7? 3
: 0;
u32 v = ch;
for (int i = 0; i < extra_bytes; i++) v = (v << 8) | fgetc(in);
switch (ch >> 5) {
case 0: case 1: case 2: case 3: // 0yyyxxzz zzzzzzzz
to_copy = (v >> 10) & 0x3;
then_copy = (v >> 12) + 3;
offset = (v & 0x3FF) + 1;
break;
case 4: case 5: // 10yyyyyy xxzzzzzz zzzzzzzz
to_copy = (v >> 14) & 0x3;
then_copy = ((v >> 16) & 0x3F) + 4;
offset = (v & 0x3FFF) + 1;
break;
case 6: // 110xxyyy zzzzzzzz zzzzzzzz yyyyyyyy
to_copy = (v >> 27) & 0x3;
then_copy = (((v >> 24) & 0x7) << 7) + (v & 0xFF) + 5;
offset = ((v >> 8) & 0xFFFF) + 1;
break;
case 7: // 111xxxxx
if (v < 0xFD) to_copy = 4 * ((v & 0x1F) + 1);
else to_copy = v - 0xFD + 1;
then_copy = 0;
offset = 0;
break;
}
// printf("\x1B[1;7m[%08lx %08x - %d %d %d]\x1B[m", ftell(in), v,
// to_copy, then_copy, offset);
// Copy
for (int i = 0; i < to_copy; i++) WRITE(fgetc(in));
// Copy from history
for (int i = 0; i < then_copy; i++) WRITE(BUF(offset));
}
free(buf);
return 0;
#undef BUF
#undef WRITE
#undef SIZE
}
//-- File format ----------------------------------------------------
struct fuse_header {
char magic[8];
u32 count;
u32 zero1;
u32 filesize;
} __attribute__((packed));
struct csv_entry {
char *path;
u32 offset;
u32 filesize;
u32 checksum;
};
typedef void (*file_entry_callback_t)(struct csv_entry *entry, void *any);
int for_each_file(FILE *f, file_entry_callback_t callback, void *any) {
char buf[BUFSIZ];
struct csv_entry entry;
while (fgets(buf, BUFSIZ, f) != NULL) {
char *fields[10];
int nfields = 1;
fields[0] = buf;
char *p = buf - 1;
while (p = strchr(p + 1, ','), p != NULL) {
fields[nfields++] = p + 1;
*p = '\0';
}
if (nfields != 4 || fields[1] - fields[0] != 9) {
fprintf(stderr, "Error reading CSV file.\n");
return 3;
}
sscanf(fields[0], "%08X", &entry.checksum);
sscanf(fields[2], "%d", &entry.filesize);
sscanf(fields[3], "%08x", &entry.offset);
entry.path = fields[1];
callback(&entry, any);
}
return 0;
}
//-- Operations -----------------------------------------------------
void print_entry_callback(struct csv_entry *entry, void *any) {
printf("%08X %08x %8d %s\n",
entry->checksum, entry->offset, entry->filesize, entry->path);
}
struct entry_data {
FILE *f;
char *path;
};
void read_entry_callback(struct csv_entry *entry, void *any) {
struct entry_data *data = any;
if (strcmp(data->path, entry->path) == 0) {
fseek(data->f, entry->offset, SEEK_SET);
if (decompress(data->f, stdout, entry->filesize) != 0) {
fprintf(stderr, "Error while decompressing!\n");
exit(2);
}
}
}
void extract_file_callback(struct csv_entry *entry, void *any) {
FILE *fuse_f = any;
static char buf[0x1000];
strcpy(buf, ".");
struct stat statbuf;
char *p = strtok(entry->path, "/");
while (p != NULL) {
char *p2 = strtok(NULL, "/");
strcat(buf, "/");
strcat(buf, p);
if (p2 == NULL) {
// File--extract
FILE *f = fopen(buf, "wb");
fseek(fuse_f, entry->offset, SEEK_SET);
int err;
if (err = decompress(fuse_f, f, entry->filesize), err != 0) {
fprintf(stderr, "Error while decompressing: %d %s\n", err, buf);
return;
}
fclose(f);
} else {
// Directory--mkdir if not present
if (stat(buf, &statbuf) != 0 || !S_ISDIR(statbuf.st_mode)) {
if (mkdir(buf, 0755) != 0) {
fprintf(stderr, "Error creating directory '%s'\n", buf);
return;
}
}
}
p = p2;
}
/*
printf("%08X %08x %8d %s\n",
entry->checksum, entry->offset, entry->filesize, entry->path);
*/
}
//-- Entry point ----------------------------------------------------
enum operation {
OP_LIST,
OP_READ,
OP_EXTRACT,
};
int read_operation(const char *s) {
if (strcmp(s, "l") == 0 || strcmp(s, "list") == 0) return OP_LIST;
if (strcmp(s, "r") == 0 || strcmp(s, "read") == 0) return OP_READ;
if (strcmp(s, "x") == 0 || strcmp(s, "extract") == 0) return OP_EXTRACT;
return -1;
}
int main(int argc, char *argv[]) {
if (argc < 4) {
fprintf(stderr, "Usage: %s <op> <csv-file> <fib-file> [...]\n", argv[0]);
return 1;
}
FILE *f = fopen(argv[2], "r");
if (f == NULL) {
fprintf(stderr, "Couldn't open '%s' for reading.\n", argv[2]);
return 1;
}
FILE *fuse_f = fopen(argv[3], "r");
if (fuse_f == NULL) {
fprintf(stderr, "Couldn't open '%s' for reading.\n", argv[3]);
return 1;
}
// Process input files
struct fuse_header header;
fread(&header, sizeof(struct fuse_header), 1, fuse_f);
if (strncmp(header.magic, "FUSE1.00", 8) != 0) {
fprintf(stderr, "Bad magic in FUSE file.\n");
return 1;
}
int err;
int operation = read_operation(argv[1]);
switch (operation) {
case OP_LIST:
if (err = for_each_file(f, print_entry_callback, NULL), err != 0) {
return err;
}
break;
case OP_READ: {
struct entry_data data = { fuse_f, argv[4] };
if (err = for_each_file(f, read_entry_callback, &data), err != 0) {
return err;
}
} break;
case OP_EXTRACT:
if (err = for_each_file(f, extract_file_callback, fuse_f), err != 0) {
return err;
}
break;
default:
fprintf(stderr, "No such operation: '%s'\n", argv[1]);
return 1;
}
return 0;
}
@jakiki6
Copy link

jakiki6 commented Apr 4, 2021

Thank you :)

@karvelasstav
Copy link

i have a fuse file but it has no accompanying csv? googling about fuse1.00 brings up only this specific post and nothing else. do you know where i can look to figure out more about this format?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment