Skip to content

Instantly share code, notes, and snippets.

@drydenp

drydenp/sparsepack.c

Last active May 29, 2017
Embed
What would you like to do?
Temporary addition to a fleeting experience
#include "sparsepack.h"
#include <stdio.h>
#include <stdlib.h>
#include <inttypes.h>
struct context {
char *cur_buf;
struct chunk_h header;
int buf_read; // buf_read means data read
int zero_read; // zero read means empty blocks read
int block_size;
};
struct statistics {
off_t zero_chunks_written;
off_t data_chunks_written;
off_t total_chunks_written;
off_t zero_blocks_written;
off_t data_blocks_written;
off_t total_blocks_written;
};
void fixup(struct statistics *s) {
s->total_chunks_written = s->zero_chunks_written + s->data_chunks_written;
s->total_blocks_written = s->zero_blocks_written + s->data_blocks_written;
}
int block_size = DEF_BLOCK_SIZE;
bool has_data(char* chunk, int size) {
while (size -= sizeof(int)) {
if (*((int*)chunk)) {
return true;
}
chunk += sizeof(int);
}
return false;
}
void write_zero_header(struct context *ctx, struct statistics *stats) {
ctx->header.type = unused;
ctx->header.size = ctx->zero_read;
fprintf(stderr, "Writing zero chunk of %d blocks\n", ctx->zero_read);
fwrite(&ctx->header, sizeof(ctx->header), 1, stdout);
stats->zero_chunks_written++;
stats->zero_blocks_written += ctx->zero_read;
ctx->zero_read = 0;
}
void write_data_header(struct context *ctx, char *full_buf, struct statistics *stats) {
ctx->header.type = used;
ctx->header.size = ctx->buf_read;
fprintf(stderr, "Writing data chunk of %d blocks\n", ctx->buf_read);
fwrite(&ctx->header, sizeof(ctx->header), 1, stdout);
fwrite(full_buf, ctx->block_size, ctx->buf_read, stdout);
stats->data_chunks_written++;
stats->data_blocks_written += ctx->buf_read;
ctx->buf_read = 0;
ctx->cur_buf = full_buf;
}
struct statistics stats = {0, 0, 0, 0};
int main(char args[]) {
char *buf = malloc(block_size * MAX_BLOCK_COUNT);
struct context ctx = {0};
int latest;
ctx.cur_buf = buf;
ctx.block_size = block_size;
// so what are our cases?
// 1. data has been read and we read a zero block.
// 2. data has been read and we read a data block.
// 3. data has not been read and we read a zero block.
// 4. data has not been read and we read a data block.
// this code will fail to read a partial block at the end.
fwrite(&our_header, sizeof(struct spaf_h), 1, stdout);
while (latest = fread(ctx.cur_buf, block_size, 1, stdin) == 1) {
// 1. scan the chunk for data
if (has_data(ctx.cur_buf, block_size)) {
// case 1: we read data but we had zeroes before.
if (ctx.zero_read > 0) {
// we write out a zero header:
write_zero_header(&ctx, &stats);
ctx.buf_read = 1;
ctx.cur_buf += block_size;
} else {
// case 2: we may or may not have read data before.
ctx.buf_read++;
if (ctx.buf_read == MAX_BLOCK_COUNT) {
// write out chunk
write_data_header(&ctx, buf, &stats);
} else {
ctx.cur_buf += block_size;
}
}
} else {
// case 4: we read zero but we have existing data
if (ctx.buf_read > 0) {
write_data_header(&ctx, buf, &stats);
ctx.zero_read = 1;
} else {
// case 3: we may or may not have read zero before:
ctx.zero_read++;
// check whether we are exceeding the maximum chunk size.
// for 4k blocks this is 64k * 4k = 256MB.
// that's not a lot...
if (ctx.zero_read == (1 << (sizeof(unsigned short) * 8)) - 1) {
write_zero_header(&ctx, &stats);
}
}
}
}
if (ctx.zero_read > 0) {
write_zero_header(&ctx, &stats);
} else {
write_data_header(&ctx, buf, &stats);
}
fixup(&stats);
fprintf(stderr, "Bytes read %" PRIu64 ", data chunks written %d totalling %" PRIu64 " blocks and %" PRIu64 " bytes. Zero chunks written %d totalling %" PRIu64 " blocks and %" PRIu64 " bytes. Total blocks written %" PRIu64 " and total chunks %d.\n", stats.total_blocks_written * block_size, stats.data_chunks_written, stats.data_blocks_written, stats.data_blocks_written * block_size, stats.zero_chunks_written, stats.zero_blocks_written, stats.zero_blocks_written * block_size, stats.total_blocks_written, stats.total_chunks_written);
}
#define MAGIC "SPAF"
#define VERSION "10"
#define false 0
#define true 1
#define MAX_BLOCK_COUNT 4096
#define DEF_BLOCK_SIZE 4096
/**
* This macro changes off_t to 64 bits, and ftruncate to 64 bits.
*/
#define _FILE_OFFSET_BITS 64
#include <stdint.h>
/*
* Although I don't really see why I shouldn't use 64-bit versions directly.
* fprintf requires the use of PRIu64 or PRIi64 macros to select the proper long type to get at 64 bits.
*/
enum checksum_algos {
algo_crc32 = 0,
algo_md5sum
};
typedef unsigned char bool;
typedef unsigned char byte;
struct spaf_h { // 16 bytes
char magic[4]; // "SPAF"
char version[2]; // "10"
bool hammington_used:8; // unused, could be used to get a kind of ECC correction without using ECC
// memory, but would probably be rather slow.
byte hammington_block_size:8; // would have to be 247 with 8 parity bits and one unused extra parity.
// but this is the data part. The full code is (255, 247) and matrices could
// be downloaded at [1]
// The output of the matrix is the 8 parity bit position values that can
// indicate an error. To be error free, the computation of this vector
// needs to be the zero vector.
bool checksum_used:8; // 9
unsigned short checksum_bits:16; // 11
enum checksum_algos checksum_algo:8; // 12
uint32_t block_size:32; // 16
} __attribute__((packed));
// [1] http://www.uni-kl.de/en/channel-codes/channel-codes-database/bch-and-hamming/
enum block_type {
used = 0,
unused
};
struct chunk_h {
enum block_type type:8;
unsigned short size:16;
byte padding:8;
} __attribute__((packed));
struct chunk_h_checksum {
enum block_type type:8;
unsigned short size:16;
unsigned long checksum:32;
} __attribute__((packed));
struct spaf_h our_header = {
MAGIC,
VERSION,
false,
247,
false,
32,
algo_crc32,
4096
};
#include "sparsepack.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <inttypes.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/unistd.h>
struct statistics {
off_t zero_chunks_read;
off_t data_chunks_read;
off_t total_chunks_read;
off_t zero_blocks_read;
off_t data_blocks_read;
off_t total_blocks_read;
};
void fixup(struct statistics *s) {
s->total_chunks_read = s->zero_chunks_read + s->data_chunks_read;
s->total_blocks_read = s->zero_blocks_read + s->data_blocks_read;
}
enum file_type {
ft_invalid = 0,
ft_file,
ft_block,
ft_pipe
};
enum file_type obtain_file_type() {
int fd = fileno(stdout);
struct stat buf;
fstat(fd, &buf);
if (buf.st_mode & (S_IFREG | S_IFLNK)) return ft_file;
if (buf.st_mode & S_IFBLK) return ft_block;
if (buf.st_mode & S_IFIFO) return ft_pipe;
return ft_invalid;
}
int main(char args[]) {
fprintf(stderr, "Sizeof %d\n", sizeof(off_t));
exit(1);
char *buf = malloc(DEF_BLOCK_SIZE * MAX_BLOCK_COUNT);
struct chunk_h chunk_header;
struct spaf_h my_header;
struct statistics stats = {0};
char *zero;
int read, i, res;
enum block_type last;
enum file_type stdout_type = obtain_file_type();
fprintf(stderr, "File type is %s.\n", stdout_type == ft_file ? "file" : (stdout_type == ft_block) ? "block device" : "pipe");
if (stdout_type == ft_invalid) {
fprintf(stderr, "Cannot write to this output.\n");
return 1;
}
fread(&my_header, sizeof(struct spaf_h), 1, stdin);
if (memcmp(&my_header.magic, MAGIC, 4) == 0 && memcmp(&my_header.version, VERSION, 2) == 0) {
fprintf(stderr, "Valid SPAF header found in input stream.\n");
} else {
fprintf(stderr, "No valid SPAF header found in input stream.\n");
return 1;
}
fprintf(stderr, "Block size %d.\n", my_header.block_size);
zero = malloc(my_header.block_size);
memset(zero, 0, my_header.block_size);
while (fread(&chunk_header, sizeof(struct chunk_h), 1, stdin)) {
fprintf(stderr, "Decoding %s chunk of %d blocks\n", chunk_header.type == used ? "data" : "zero", chunk_header.size);
if (chunk_header.type == used) {
read = fread(buf, my_header.block_size, chunk_header.size, stdin);
fwrite(buf, my_header.block_size, chunk_header.size, stdout);
stats.data_chunks_read += 1;
stats.data_blocks_read += chunk_header.size;
} else {
// if it is a regular file, then seek and truncate at the end, creating a sparse file.
if (stdout_type == ft_file || stdout_type == ft_block) {
fseek(stdout, chunk_header.size * my_header.block_size, SEEK_CUR);
} else {
for (i = 0; i < chunk_header.size; i++) {
fwrite(zero, my_header.block_size, 1, stdout);
}
}
stats.zero_chunks_read += 1;
stats.zero_blocks_read += chunk_header.size;
}
}
fixup(&stats);
if (chunk_header.type == unused && stdout_type == ft_file) {
fprintf(stderr, "Truncating file at %" PRIu64 " bytes.\n", stats.total_blocks_read * my_header.block_size);
res = ftruncate(fileno(stdout), stats.total_blocks_read * my_header.block_size);
/*if (res) {
fprintf(stderr, "Truncate error.\n");
}*/
}
fprintf(stderr, "Total blocks processed: %" PRIu64 ".", stats.total_blocks_read);
fprintf(stderr, " Total chunks: %" PRIu64 ".", stats.total_chunks_read);
fprintf(stderr, " Zero: %" PRIu64 "/%" PRIu64 ", data: %" PRIu64 "/%" PRIu64 ". Data %: %04.2f\n",
stats.zero_blocks_read,
stats.zero_chunks_read,
stats.data_blocks_read,
stats.data_chunks_read,
stats.data_blocks_read / (double)stats.total_blocks_read);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment