Skip to content

Instantly share code, notes, and snippets.

@luisbebop
Created October 1, 2013 19:40
Show Gist options
  • Save luisbebop/6783933 to your computer and use it in GitHub Desktop.
Save luisbebop/6783933 to your computer and use it in GitHub Desktop.
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include <errno.h>
#include <ctype.h>
#include "bencode.h"
#define MAX_ALLOC (((size_t) -1) / sizeof(struct bencode *) / 2)
struct decode {
const char *data;
const size_t len;
size_t off;
int error;
int level;
};
/*
* Buffer size for fitting all unsigned long long and long long integers,
* assuming it is at most 64 bits. If long long is larger than 64 bits,
* an error is produced when too large an integer is converted.
*/
#define LONGLONGSIZE 21
struct bencode_keyvalue {
struct bencode *key;
struct bencode *value;
};
static struct bencode *decode(struct decode *ctx);
static size_t find(const char *data, size_t len, size_t off, char c)
{
for (; off < len; off++) {
if (data[off] == c)
return off;
}
return -1;
}
static size_t type_size(int type)
{
switch (type) {
case BENCODE_BOOL:
return sizeof(struct bencode_bool);
case BENCODE_DICT:
return sizeof(struct bencode_dict);
case BENCODE_INT:
return sizeof(struct bencode_int);
case BENCODE_LIST:
return sizeof(struct bencode_list);
case BENCODE_STR:
return sizeof(struct bencode_str);
default:
fprintf(stderr, "Unknown bencode type: %d\n", type);
abort();
}
}
static void *alloc(int type)
{
struct bencode *b = calloc(1, type_size(type));
if (b == NULL)
return NULL;
b->type = type;
return b;
}
static int insufficient(struct decode *ctx)
{
ctx->error = BEN_INSUFFICIENT;
return -1;
}
static int invalid(struct decode *ctx)
{
ctx->error = BEN_INVALID;
return -1;
}
static void *insufficient_ptr(struct decode *ctx)
{
ctx->error = BEN_INSUFFICIENT;
return NULL;
}
static void *invalid_ptr(struct decode *ctx)
{
ctx->error = BEN_INVALID;
return NULL;
}
static void *oom_ptr(struct decode *ctx)
{
ctx->error = BEN_NO_MEMORY;
return NULL;
}
static struct bencode *decode_bool(struct decode *ctx)
{
struct bencode_bool *b;
char value;
char c;
if ((ctx->off + 2) > ctx->len)
return insufficient_ptr(ctx);
c = ctx->data[ctx->off + 1];
if (c != '0' && c != '1')
return invalid_ptr(ctx);
value = (c == '1');
b = alloc(BENCODE_BOOL);
if (b == NULL)
return oom_ptr(ctx);
b->b = value;
ctx->off += 2;
return (struct bencode *) b;
}
static int resize_dict(struct bencode_dict *d)
{
struct bencode **newkeys;
struct bencode **newvalues;
size_t newsize;
if (d->alloc >= MAX_ALLOC)
return -1;
if (d->alloc == 0)
d->alloc = 4;
else
d->alloc *= 2;
newsize = sizeof(d->values[0]) * d->alloc;
newkeys = realloc(d->keys, newsize);
newvalues = realloc(d->values, newsize);
if (newkeys == NULL || newvalues == NULL) {
free(newkeys);
free(newvalues);
return -1;
}
d->keys = newkeys;
d->values = newvalues;
return 0;
}
int ben_cmp(const struct bencode *a, const struct bencode *b)
{
size_t cmplen;
int ret;
const struct bencode_str *sa;
const struct bencode_str *sb;
if (a->type != b->type)
return (a->type == BENCODE_INT) ? -1 : 1;
if (a->type == BENCODE_INT) {
const struct bencode_int *ia = ben_int_const_cast(a);
const struct bencode_int *ib = ben_int_const_cast(b);
if (ia->ll < ib->ll)
return -1;
if (ib->ll < ia->ll)
return 1;
return 0;
}
sa = ben_str_const_cast(a);
sb = ben_str_const_cast(b);
cmplen = (sa->len <= sb->len) ? sa->len : sb->len;
ret = memcmp(sa->s, sb->s, cmplen);
if (sa->len == sb->len)
return ret;
if (ret)
return ret;
return (sa->len < sb->len) ? -1 : 1;
}
int ben_cmp_qsort(const void *a, const void *b)
{
const struct bencode *akey = ((const struct bencode_keyvalue *) a)->key;
const struct bencode *bkey = ((const struct bencode_keyvalue *) b)->key;
return ben_cmp(akey, bkey);
}
static struct bencode *decode_dict(struct decode *ctx)
{
struct bencode *key;
struct bencode *value;
struct bencode_dict *d;
d = alloc(BENCODE_DICT);
if (d == NULL) {
fprintf(stderr, "bencode: Not enough memory for dict\n");
return oom_ptr(ctx);
}
ctx->off += 1;
while (ctx->off < ctx->len && ctx->data[ctx->off] != 'e') {
if (d->n == d->alloc && resize_dict(d)) {
fprintf(stderr, "bencode: Can not resize dict\n");
ctx->error = BEN_NO_MEMORY;
goto error;
}
key = decode(ctx);
if (key == NULL)
goto error;
if (key->type != BENCODE_INT && key->type != BENCODE_STR) {
ben_free(key);
key = NULL;
ctx->error = BEN_INVALID;
fprintf(stderr, "bencode: Invalid dict key type\n");
goto error;
}
if (d->n > 0 && ben_cmp(d->keys[d->n - 1], key) >= 0) {
ben_free(key);
key = NULL;
ctx->error = BEN_INVALID;
goto error;
}
value = decode(ctx);
if (value == NULL) {
ben_free(key);
key = NULL;
goto error;
}
d->keys[d->n] = key;
d->values[d->n] = value;
d->n++;
}
if (ctx->off >= ctx->len) {
ctx->error = BEN_INSUFFICIENT;
goto error;
}
ctx->off += 1;
return (struct bencode *) d;
error:
ben_free((struct bencode *) d);
return NULL;
}
/* off is the position of first number in */
static int read_long_long(long long *ll, struct decode *ctx, int c)
{
char buf[LONGLONGSIZE]; /* fits all 64 bit integers */
size_t pos;
char *endptr;
size_t slen;
pos = find(ctx->data, ctx->len, ctx->off, c);
if (pos == -1)
return insufficient(ctx);
slen = pos - ctx->off;
if (slen == 0 || slen >= sizeof buf)
return invalid(ctx);
assert(slen < sizeof buf);
memcpy(buf, ctx->data + ctx->off, slen);
buf[slen] = 0;
if (buf[0] != '-' && !isdigit(buf[0]))
return invalid(ctx);
errno = 0;
*ll = strtoll(buf, &endptr, 10);
if (errno == ERANGE || *endptr != 0)
return invalid(ctx);
/*
* Demand a unique encoding for all integers.
* Zero may not begin with a (minus) sign.
* Non-zero integers may not have leading zeros in the encoding.
*/
if (buf[0] == '-' && buf[1] == '0')
return invalid(ctx);
if (buf[0] == '0' && pos != (ctx->off + 1))
return invalid(ctx);
ctx->off = pos + 1;
return 0;
}
static struct bencode *decode_int(struct decode *ctx)
{
struct bencode_int *b;
long long ll;
ctx->off += 1;
if (read_long_long(&ll, ctx, 'e'))
return NULL;
b = alloc(BENCODE_INT);
if (b == NULL)
return oom_ptr(ctx);
b->ll = ll;
return (struct bencode *) b;
}
static int resize_list(struct bencode_list *list)
{
struct bencode **newvalues;
size_t newsize;
if (list->alloc >= MAX_ALLOC)
return -1;
if (list->alloc == 0)
list->alloc = 4;
else
list->alloc *= 2;
newsize = sizeof(list->values[0]) * list->alloc;
newvalues = realloc(list->values, newsize);
if (newvalues == NULL)
return -1;
list->values = newvalues;
return 0;
}
static struct bencode *decode_list(struct decode *ctx)
{
struct bencode_list *l = alloc(BENCODE_LIST);
if (l == NULL)
return oom_ptr(ctx);
ctx->off += 1;
while (ctx->off < ctx->len && ctx->data[ctx->off] != 'e') {
struct bencode *b = decode(ctx);
if (b == NULL)
goto error;
if (ben_list_append((struct bencode *) l, b)) {
ben_free(b);
ctx->error = BEN_NO_MEMORY;
goto error;
}
}
if (ctx->off >= ctx->len) {
ctx->error = BEN_INSUFFICIENT;
goto error;
}
ctx->off += 1;
return (struct bencode *) l;
error:
ben_free((struct bencode *) l);
return NULL;
}
static size_t read_size_t(struct decode *ctx, int c)
{
long long ll;
size_t s;
if (read_long_long(&ll, ctx, c))
return -1;
if (ll < 0)
return invalid(ctx);
/*
* Test that information is not lost when converting from long long
* to size_t
*/
s = (size_t) ll;
if (ll != (long long) s)
return invalid(ctx);
return s;
}
static struct bencode *decode_str(struct decode *ctx)
{
struct bencode *b;
size_t datalen = read_size_t(ctx, ':'); /* Read the string length */
if (datalen == -1)
return NULL;
if ((ctx->off + datalen) > ctx->len)
return insufficient_ptr(ctx);
/* Allocate string structure and copy data into it */
b = ben_blob(ctx->data + ctx->off, datalen);
ctx->off += datalen;
return b;
}
static struct bencode *decode(struct decode *ctx)
{
ctx->level++;
if (ctx->level > 256)
return invalid_ptr(ctx);
if (ctx->off == ctx->len)
return insufficient_ptr(ctx);
assert (ctx->off < ctx->len);
switch (ctx->data[ctx->off]) {
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
return decode_str(ctx);
case 'b':
return decode_bool(ctx);
case 'd':
return decode_dict(ctx);
case 'i':
return decode_int(ctx);
case 'l':
return decode_list(ctx);
default:
return invalid_ptr(ctx);
}
}
struct bencode *ben_decode(const void *data, size_t len)
{
struct decode ctx = {.data = data, .len = len};
struct bencode *b = decode(&ctx);
if (b != NULL && ctx.off != len) {
ben_free(b);
return NULL;
}
return b;
}
struct bencode *ben_decode2(const void *data, size_t len, size_t *off, int *error)
{
struct decode ctx = {.data = data, .len = len, .off = *off};
struct bencode *b = decode(&ctx);
*off = ctx.off;
if (error != NULL) {
assert((b != NULL) ^ (ctx.error != 0));
*error = ctx.error;
}
return b;
}
static void free_dict(struct bencode_dict *d)
{
size_t pos;
for (pos = 0; pos < d->n; pos++) {
ben_free(d->keys[pos]);
d->keys[pos] = NULL;
ben_free(d->values[pos]);
d->values[pos] = NULL;
}
}
static void free_list(struct bencode_list *list)
{
size_t pos;
for (pos = 0; pos < list->n; pos++) {
ben_free(list->values[pos]);
list->values[pos] = NULL;
}
}
static int putonechar(char *data, size_t size, size_t *pos, char c)
{
if (*pos >= size)
return -1;
data[*pos] = c;
*pos += 1;
return 0;
}
static int puthexchar(char *data, size_t size, size_t *pos, unsigned char hex)
{
char buf[5];
int len = snprintf(buf, sizeof buf, "\\x%.2x", hex);
assert(len == 4);
if ((*pos + len) > size)
return -1;
memcpy(data + *pos, buf, len);
*pos += len;
return 0;
}
static int putlonglong(char *data, size_t size, size_t *pos, long long ll)
{
char buf[LONGLONGSIZE];
int len = snprintf(buf, sizeof buf, "%lld", ll);
assert(len > 0);
if ((*pos + len) > size)
return -1;
memcpy(data + *pos, buf, len);
*pos += len;
return 0;
}
static int putunsignedlonglong(char *data, size_t size, size_t *pos,
unsigned long long llu)
{
char buf[LONGLONGSIZE];
int len = snprintf(buf, sizeof buf, "%llu", llu);
assert(len > 0);
if ((*pos + len) > size)
return -1;
memcpy(data + *pos, buf, len);
*pos += len;
return 0;
}
static int putstr(char *data, size_t size, size_t *pos, char *s)
{
size_t len = strlen(s);
if (*pos + len > size)
return -1;
memcpy(data + *pos, s, len);
*pos += len;
return 0;
}
static int print(char *data, size_t size, size_t *pos, const struct bencode *b)
{
const struct bencode_bool *boolean;
const struct bencode_dict *dict;
const struct bencode_int *integer;
const struct bencode_list *list;
const struct bencode_str *s;
size_t i;
int len;
struct bencode_keyvalue *pairs;
switch (b->type) {
case BENCODE_BOOL:
boolean = ben_bool_const_cast(b);
len = boolean->b ? 4 : 5;
if (*pos + len > size)
return -1;
memcpy(data + *pos, (len == 4) ? "True" : "False", len);
*pos += len;
return 0;
case BENCODE_DICT:
if (putonechar(data, size, pos, '{'))
return -1;
dict = ben_dict_const_cast(b);
pairs = malloc(dict->n * sizeof(pairs[0]));
if (pairs == NULL) {
fprintf(stderr, "bencode: No memory for dict serialization\n");
return -1;
}
for (i = 0; i < dict->n; i++) {
pairs[i].key = dict->keys[i];
pairs[i].value = dict->values[i];
}
qsort(pairs, dict->n, sizeof(pairs[0]), ben_cmp_qsort);
for (i = 0; i < dict->n; i++) {
if (print(data, size, pos, pairs[i].key))
break;
if (putstr(data, size, pos, ": "))
break;
if (print(data, size, pos, pairs[i].value))
break;
if (i < (dict->n - 1)) {
if (putstr(data, size, pos, ", "))
break;
}
}
free(pairs);
pairs = NULL;
if (i < dict->n)
return -1;
return putonechar(data, size, pos, '}');
case BENCODE_INT:
integer = ben_int_const_cast(b);
if (putlonglong(data, size, pos, integer->ll))
return -1;
return 0;
case BENCODE_LIST:
if (putonechar(data, size, pos, '['))
return -1;
list = ben_list_const_cast(b);
for (i = 0; i < list->n; i++) {
if (print(data, size, pos, list->values[i]))
return -1;
if (i < (list->n - 1) && putstr(data, size, pos, ", "))
return -1;
}
return putonechar(data, size, pos, ']');
case BENCODE_STR:
s = ben_str_const_cast(b);
if (putonechar(data, size, pos, '\''))
return -1;
for (i = 0; i < s->len; i++) {
if (!isprint(s->s[i])) {
if (puthexchar(data, size, pos, s->s[i]))
return -1;
continue;
}
switch (s->s[i]) {
case '\'':
case '\\':
/* Need escape character */
if (putonechar(data, size, pos, '\\'))
return -1;
default:
if (putonechar(data, size, pos, s->s[i]))
return -1;
break;
}
}
return putonechar(data, size, pos, '\'');
default:
fprintf(stderr, "bencode: serialization type %d not implemented\n", b->type);
abort();
}
}
static size_t get_printed_size(const struct bencode *b)
{
size_t pos;
const struct bencode_bool *boolean;
const struct bencode_dict *d;
const struct bencode_int *i;
const struct bencode_list *l;
const struct bencode_str *s;
size_t size = 0;
char buf[1];
switch (b->type) {
case BENCODE_BOOL:
boolean = ben_bool_const_cast(b);
return boolean->b ? 4 : 5; /* "True" and "False" */
case BENCODE_DICT:
size += 1; /* "{" */
d = ben_dict_const_cast(b);
for (pos = 0; pos < d->n; pos++) {
size += get_printed_size(d->keys[pos]);
size += 2; /* ": " */
size += get_printed_size(d->values[pos]);
if (pos < (d->n - 1))
size += 2; /* ", " */
}
size += 1; /* "}" */
return size;
case BENCODE_INT:
i = ben_int_const_cast(b);
return snprintf(buf, 0, "%lld", i->ll);
case BENCODE_LIST:
size += 1; /* "[" */
l = ben_list_const_cast(b);
for (pos = 0; pos < l->n; pos++) {
size += get_printed_size(l->values[pos]);
if (pos < (l->n - 1))
size += 2; /* ", " */
}
size += 1; /* "]" */
return size;
case BENCODE_STR:
s = ben_str_const_cast(b);
size += 1; /* ' */
for (pos = 0; pos < s->len; pos++) {
if (!isprint(s->s[pos])) {
size += 4; /* "\xDD" */
continue;
}
switch (s->s[pos]) {
case '\'':
case '\\':
size += 2; /* escaped characters */
break;
default:
size += 1;
break;
}
}
size += 1; /* ' */
return size;
default:
fprintf(stderr, "bencode: invalid bencode type: %c\n", b->type);
abort();
}
}
static int serialize(char *data, size_t size, size_t *pos,
const struct bencode *b)
{
const struct bencode_dict *dict;
const struct bencode_int *integer;
const struct bencode_list *list;
const struct bencode_str *s;
size_t i;
struct bencode_keyvalue *pairs;
switch (b->type) {
case BENCODE_BOOL:
if ((*pos + 2) > size)
return -1;
data[*pos] = 'b';
data[*pos + 1] = ben_bool_const_cast(b)->b ? '1' : '0';
*pos += 2;
return 0;
case BENCODE_DICT:
if (putonechar(data, size, pos, 'd'))
return -1;
dict = ben_dict_const_cast(b);
pairs = malloc(dict->n * sizeof(pairs[0]));
if (pairs == NULL) {
fprintf(stderr, "bencode: No memory for dict serialization\n");
return -1;
}
for (i = 0; i < dict->n; i++) {
pairs[i].key = dict->keys[i];
pairs[i].value = dict->values[i];
}
qsort(pairs, dict->n, sizeof(pairs[0]), ben_cmp_qsort);
for (i = 0; i < dict->n; i++) {
if (serialize(data, size, pos, pairs[i].key))
break;
if (serialize(data, size, pos, pairs[i].value))
break;
}
free(pairs);
pairs = NULL;
if (i < dict->n)
return -1;
return putonechar(data, size, pos, 'e');
case BENCODE_INT:
if (putonechar(data, size, pos, 'i'))
return -1;
integer = ben_int_const_cast(b);
if (putlonglong(data, size, pos, integer->ll))
return -1;
return putonechar(data, size, pos, 'e');
case BENCODE_LIST:
if (putonechar(data, size, pos, 'l'))
return -1;
list = ben_list_const_cast(b);
for (i = 0; i < list->n; i++) {
if (serialize(data, size, pos, list->values[i]))
return -1;
}
return putonechar(data, size, pos, 'e');
case BENCODE_STR:
s = ben_str_const_cast(b);
if (putunsignedlonglong(data, size, pos, ((long long) s->len)))
return -1;
if (putonechar(data, size, pos, ':'))
return -1;
if ((*pos + s->len) > size)
return -1;
memcpy(data + *pos, s->s, s->len);
*pos += s->len;
return 0;
default:
fprintf(stderr, "bencode: serialization type %d not implemented\n", b->type);
abort();
}
}
static size_t get_size(const struct bencode *b)
{
size_t pos;
const struct bencode_dict *d;
const struct bencode_int *i;
const struct bencode_list *l;
const struct bencode_str *s;
size_t size = 0;
char buf[1];
switch (b->type) {
case BENCODE_BOOL:
return 2;
case BENCODE_DICT:
d = ben_dict_const_cast(b);
for (pos = 0; pos < d->n; pos++) {
size += get_size(d->keys[pos]);
size += get_size(d->values[pos]);
}
return size + 2;
case BENCODE_INT:
i = ben_int_const_cast(b);
return 2 + snprintf(buf, 0, "%lld", i->ll);
case BENCODE_LIST:
l = ben_list_const_cast(b);
for (pos = 0; pos < l->n; pos++)
size += get_size(l->values[pos]);
return size + 2;
case BENCODE_STR:
s = ben_str_const_cast(b);
return snprintf(buf, 0, "%zu", s->len) + 1 + s->len;
default:
fprintf(stderr, "bencode: invalid bencode type: %c\n", b->type);
abort();
}
}
size_t ben_encoded_size(const struct bencode *b)
{
return get_size(b);
}
void *ben_encode(size_t *len, const struct bencode *b)
{
size_t size = get_size(b);
void *data = malloc(size);
if (data == NULL) {
fprintf(stderr, "bencode: No memory to encode\n");
return NULL;
}
*len = 0;
if (serialize(data, size, len, b)) {
free(data);
return NULL;
}
assert(*len == size);
return data;
}
size_t ben_encode2(char *data, size_t maxlen, const struct bencode *b)
{
size_t pos = 0;
if (serialize(data, maxlen, &pos, b))
return -1;
return pos;
}
void ben_free(struct bencode *b)
{
if (b == NULL)
return;
switch (b->type) {
case BENCODE_BOOL:
break;
case BENCODE_DICT:
free_dict((struct bencode_dict *) b);
break;
case BENCODE_INT:
break;
case BENCODE_LIST:
free_list((struct bencode_list *) b);
break;
case BENCODE_STR:
free(((struct bencode_str *) b)->s);
break;
default:
fprintf(stderr, "bencode: invalid type: %d\n", b->type);
abort();
}
memset(b, -1, type_size(b->type)); /* data poison */
free(b);
}
struct bencode *ben_blob(const void *data, size_t len)
{
struct bencode_str *b = alloc(BENCODE_STR);
if (b == NULL)
return NULL;
/* Allocate one extra byte for zero termination for convenient use */
b->s = malloc(len + 1);
if (b->s == NULL) {
free(b);
return NULL;
}
memcpy(b->s, data, len);
b->len = len;
b->s[len] = 0;
return (struct bencode *) b;
}
struct bencode *ben_bool(int boolean)
{
struct bencode_bool *b = alloc(BENCODE_BOOL);
if (b == NULL)
return NULL;
b->b = boolean ? 1 : 0;
return (struct bencode *) b;
}
struct bencode *ben_dict(void)
{
return alloc(BENCODE_DICT);
}
struct bencode *ben_dict_get(const struct bencode *dict, const struct bencode *key)
{
const struct bencode_dict *d = ben_dict_const_cast(dict);
size_t pos;
for (pos = 0; pos < d->n; pos++) {
if (ben_cmp(d->keys[pos], key) == 0)
return d->values[pos];
}
return NULL;
}
struct bencode *ben_dict_get_by_str(const struct bencode *dict, const char *key)
{
const struct bencode_dict *d = ben_dict_const_cast(dict);
size_t keylen = strlen(key);
struct bencode_str *dkey;
size_t pos;
for (pos = 0; pos < d->n; pos++) {
dkey = ben_str_cast(d->keys[pos]);
if (dkey == NULL)
continue;
if (dkey->len != keylen)
continue;
if (strcmp(dkey->s, key) == 0)
return d->values[pos];
}
return NULL;
}
static void replacewithlast(struct bencode **arr, size_t i, size_t n)
{
arr[i] = arr[n - 1];
arr[n - 1] = NULL;
}
struct bencode *ben_dict_pop(struct bencode *dict, const struct bencode *key)
{
struct bencode_dict *d = ben_dict_cast(dict);
size_t pos;
for (pos = 0; pos < d->n; pos++) {
if (ben_cmp(d->keys[pos], key) == 0) {
struct bencode *value = d->values[pos];
ben_free(d->keys[pos]);
replacewithlast(d->keys, pos, d->n);
replacewithlast(d->values, pos, d->n);
d->n -= 1;
return value;
}
}
return NULL;
}
int ben_dict_set(struct bencode *dict, struct bencode *key, struct bencode *value)
{
struct bencode_dict *d = ben_dict_cast(dict);
assert(d->n <= d->alloc);
if (d->n == d->alloc && resize_dict(d))
return -1;
ben_free(ben_dict_pop(dict, key));
d->keys[d->n] = key;
d->values[d->n] = value;
d->n++;
return 0;
}
int ben_dict_set_by_str(struct bencode *dict, const char *key, struct bencode *value)
{
struct bencode *bkey = ben_str(key);
if (bkey == NULL)
return -1;
if (ben_dict_set(dict, bkey, value)) {
ben_free(bkey);
return -1;
}
return 0;
}
int ben_dict_set_str_by_str(struct bencode *dict, const char *key, const char *value)
{
struct bencode *bkey = ben_str(key);
struct bencode *bvalue = ben_str(value);
if (bkey == NULL || bvalue == NULL) {
ben_free(bkey);
ben_free(bvalue);
return -1;
}
if (ben_dict_set(dict, bkey, bvalue)) {
ben_free(bkey);
ben_free(bvalue);
return -1;
}
return 0;
}
struct bencode *ben_int(long long ll)
{
struct bencode_int *b = alloc(BENCODE_INT);
if (b == NULL)
return NULL;
b->ll = ll;
return (struct bencode *) b;
}
struct bencode *ben_list(void)
{
return alloc(BENCODE_LIST);
}
int ben_list_append(struct bencode *list, struct bencode *b)
{
struct bencode_list *l = ben_list_cast(list);
/* NULL pointer de-reference if the cast fails */
assert(l->n <= l->alloc);
if (l->n == l->alloc && resize_list(l))
return -1;
l->values[l->n] = b;
l->n += 1;
return 0;
}
void ben_list_set(struct bencode *list, size_t i, struct bencode *b)
{
struct bencode_list *l = ben_list_cast(list);
if (i >= l->n) {
fprintf(stderr, "bencode: ben_list_set() out of bounds: %zu\n", i);
abort();
}
ben_free(l->values[i]);
l->values[i] = b;
}
char *ben_print(const struct bencode *b)
{
size_t size = get_printed_size(b);
char *data = malloc(size + 1);
size_t len = 0;
if (data == NULL) {
fprintf(stderr, "bencode: No memory to print\n");
return NULL;
}
if (print(data, size, &len, b)) {
free(data);
return NULL;
}
assert(len == size);
data[size] = 0;
return data;
}
struct bencode *ben_str(const char *s)
{
return ben_blob(s, strlen(s));
}
const char *ben_strerror(int error)
{
switch (error) {
case BEN_OK:
return "OK (no error)";
case BEN_INVALID:
return "Invalid data";
case BEN_INSUFFICIENT:
return "Insufficient amount of data (need more data)";
case BEN_NO_MEMORY:
return "Out of memory";
default:
fprintf(stderr, "Unknown error code: %d\n", error);
return NULL;
}
}
#ifndef _BENCODE_H
#define _BENCODE_H
#include <stdio.h>
enum {
BENCODE_BOOL = 1,
BENCODE_DICT,
BENCODE_INT,
BENCODE_LIST,
BENCODE_STR,
};
enum {
BEN_OK = 0, /* No errors. Set to zero. Non-zero implies an error. */
BEN_INVALID, /* Invalid data was given to decoder */
BEN_INSUFFICIENT, /* Insufficient amount of data for decoding */
BEN_NO_MEMORY, /* Memory allocation failed */
};
struct bencode {
char type;
};
struct bencode_bool {
char type;
char b;
};
struct bencode_dict {
char type;
size_t n;
size_t alloc;
/* keys and values can be put into a same array, later */
struct bencode **keys;
struct bencode **values;
};
struct bencode_int {
char type;
long long ll;
};
struct bencode_list {
char type;
size_t n;
size_t alloc;
struct bencode **values;
};
struct bencode_str {
char type;
size_t len;
char *s;
};
/*
* Decode 'data' with 'len' bytes of data. Returns NULL on error.
* The encoded data must be exactly 'len' bytes (not less), otherwise NULL
* is returned. ben_decode2() function supports partial decoding ('len' is
* larger than actual decoded message) and gives more accurate error reports.
*/
struct bencode *ben_decode(const void *data, size_t len);
/*
* Same as ben_decode(), but allows one to set start offset for decoding with
* 'off' and reports errors more accurately.
*
* '*off' must point to decoding start offset inside 'data'.
* If decoding is successful, '*off' is updated to point to the next byte
* after the decoded message.
*
* If 'error != NULL', it is updated according to the success and error of
* the decoding. BEN_OK is success, BEN_INVALID means invalid data.
* BEN_INSUFFICIENT means data is invalid but could be valid if more data
* was given for decoding. BEN_NO_MEMORY means decoding ran out of memory.
*/
struct bencode *ben_decode2(const void *data, size_t len, size_t *off, int *error);
/*
* ben_cmp() is similar to strcmp(), but compares both integers and strings.
* An integer is always less than a string.
*
* ben_cmp(a, b) returns -1 if a < b, 0 if a == b, and 1 if a > b.
*/
int ben_cmp(const struct bencode *a, const struct bencode *b);
/*
* Comparison function suitable for qsort(). Uses ben_cmp(), so this can be
* used to order both integer and string arrays.
*/
int ben_cmp_qsort(const void *a, const void *b);
/* Get the serialization size of bencode structure 'b' */
size_t ben_encoded_size(const struct bencode *b);
/* encode 'b'. Return encoded data with a pointer, and length in '*len' */
void *ben_encode(size_t *len, const struct bencode *b);
/*
* encode 'b' into 'data' buffer with at most 'maxlen' bytes.
* Returns the size of encoded data.
*/
size_t ben_encode2(char *data, size_t maxlen, const struct bencode *b);
/* You must use ben_free() for all allocated bencode structures after use */
void ben_free(struct bencode *b);
/* Create a string from binary data with len bytes */
struct bencode *ben_blob(const void *data, size_t len);
/* Create a boolean from integer */
struct bencode *ben_bool(int b);
/* Create an empty dictionary */
struct bencode *ben_dict(void);
/*
* Try to locate 'key' in dictionary. Returns the associated value, if found.
* Returns NULL if the key does not exist.
*/
struct bencode *ben_dict_get(const struct bencode *d, const struct bencode *key);
struct bencode *ben_dict_get_by_str(const struct bencode *d, const char *key);
/*
* Try to locate 'key' in dictionary. Returns the associated value, if found.
* The value must be later freed with ben_free(). Returns NULL if the key
* does not exist.
*/
struct bencode *ben_dict_pop(struct bencode *d, const struct bencode *key);
/*
* Set 'key' in dictionary to be 'value'. An old value exists for the key
* is freed if it exists. 'key' and 'value' are owned by the dictionary
* after a successful call (one may not call ben_free() for 'key' or
* 'value'). One may free 'key' and 'value' if the call is unsuccessful.
*
* Returns 0 on success, -1 on failure (no memory).
*/
int ben_dict_set(struct bencode *d, struct bencode *key, struct bencode *value);
/* Same as ben_dict_set(), but the key is a C string */
int ben_dict_set_by_str(struct bencode *d, const char *key, struct bencode *value);
/* Same as ben_dict_set(), but the key and value are C strings */
int ben_dict_set_str_by_str(struct bencode *d, const char *key, const char *value);
struct bencode *ben_int(long long ll);
/* Create an empty list */
struct bencode *ben_list(void);
/*
* Append 'b' to 'list'. Returns 0 on success, -1 on failure (no memory).
* One may not call ben_free(b) after a successful call, because the list owns
* the object 'b'.
*/
int ben_list_append(struct bencode *list, struct bencode *b);
/*
* Returns a Python formatted C string representation of 'b' on success,
* NULL on failure. The returned string should be freed with free().
*
* Note: The string is terminated with '\0'. All instances of '\0' bytes in
* the bencoded data are escaped so that there is only one '\0' byte
* in the generated string at the end.
*/
char *ben_print(const struct bencode *b);
/* Create a string from C string (note bencode string may contain '\0'. */
struct bencode *ben_str(const char *s);
/* Return a human readable explanation of error returned with ben_decode2() */
const char *ben_strerror(int error);
/* ben_is_bool() returns 1 iff b is a boolean, 0 otherwise */
static inline int ben_is_bool(struct bencode *b)
{
return b->type == BENCODE_BOOL;
}
static inline int ben_is_dict(struct bencode *b)
{
return b->type == BENCODE_DICT;
}
static inline int ben_is_int(struct bencode *b)
{
return b->type == BENCODE_INT;
}
static inline int ben_is_list(struct bencode *b)
{
return b->type == BENCODE_LIST;
}
static inline int ben_is_str(struct bencode *b)
{
return b->type == BENCODE_STR;
}
/*
* ben_bool_const_cast(b) returns "(const struct bencode_bool *) b" if the
* underlying object is a boolean, NULL otherwise.
*/
static inline const struct bencode_bool *ben_bool_const_cast(const struct bencode *b)
{
return b->type == BENCODE_BOOL ? ((const struct bencode_bool *) b) : NULL;
}
/*
* ben_bool_cast(b) returns "(struct bencode_bool *) b" if the
* underlying object is a boolean, NULL otherwise.
*/
static inline struct bencode_bool *ben_bool_cast(struct bencode *b)
{
return b->type == BENCODE_BOOL ? ((struct bencode_bool *) b) : NULL;
}
static inline const struct bencode_dict *ben_dict_const_cast(const struct bencode *b)
{
return b->type == BENCODE_DICT ? ((const struct bencode_dict *) b) : NULL;
}
static inline struct bencode_dict *ben_dict_cast(struct bencode *b)
{
return b->type == BENCODE_DICT ? ((struct bencode_dict *) b) : NULL;
}
static inline const struct bencode_int *ben_int_const_cast(const struct bencode *i)
{
return i->type == BENCODE_INT ? ((const struct bencode_int *) i) : NULL;
}
static inline struct bencode_int *ben_int_cast(struct bencode *i)
{
return i->type == BENCODE_INT ? ((struct bencode_int *) i) : NULL;
}
static inline const struct bencode_list *ben_list_const_cast(const struct bencode *list)
{
return list->type == BENCODE_LIST ? ((const struct bencode_list *) list) : NULL;
}
static inline struct bencode_list *ben_list_cast(struct bencode *list)
{
return list->type == BENCODE_LIST ? ((struct bencode_list *) list) : NULL;
}
static inline const struct bencode_str *ben_str_const_cast(const struct bencode *str)
{
return str->type == BENCODE_STR ? ((const struct bencode_str *) str) : NULL;
}
static inline struct bencode_str *ben_str_cast(struct bencode *str)
{
return str->type == BENCODE_STR ? ((struct bencode_str *) str) : NULL;
}
/* Return the number of keys in a dictionary 'b' */
static inline size_t ben_dict_len(const struct bencode *b)
{
return ben_dict_const_cast(b)->n;
}
/* Return the number of items in a list 'b' */
static inline size_t ben_list_len(const struct bencode *b)
{
return ben_list_const_cast(b)->n;
}
/*
* ben_list_get(list, i) returns object at position i in list,
* or NULL if position i is out of bounds.
*/
static inline struct bencode *ben_list_get(const struct bencode *list, size_t i)
{
const struct bencode_list *l = ben_list_const_cast(list);
return i < l->n ? l->values[i] : NULL;
}
/*
* ben_list_set(list, i, b) sets object b to list at position i.
* The old value at position i is freed.
* The program aborts if position i is out of bounds.
*/
void ben_list_set(struct bencode *list, size_t i, struct bencode *b);
/* Return the number of bytes in a string 'b' */
static inline size_t ben_str_len(const struct bencode *b)
{
return ben_str_const_cast(b)->len;
}
/* Return boolean value (0 or 1) of 'b' */
static inline int ben_bool_val(const struct bencode *b)
{
return ben_bool_const_cast(b)->b ? 1 : 0;
}
/* Return integer value of 'b' */
static inline long long ben_int_val(const struct bencode *b)
{
return ben_int_const_cast(b)->ll;
}
/*
* Note: the string is always zero terminated. Also, the string may
* contain more than one zero.
* bencode strings are not compatible with C strings.
*/
static inline const char *ben_str_val(const struct bencode *b)
{
return ben_str_const_cast(b)->s;
}
/*
* ben_list_for_each() is an iterator macro for bencoded lists.
*
* pos is a size_t.
*/
#define ben_list_for_each(b, pos, l) \
for ((pos) = 0; (b) = ((const struct bencode_list *) (l))->values[(pos)], (pos) < ((const struct bencode_list *) (l))->n; (pos)++)
/*
* ben_dict_for_each() is an iterator macro for bencoded dictionaries.
*
* pos is a size_t.
*/
#define ben_dict_for_each(key, value, pos, d) \
for ((pos) = 0; (key) = ((const struct bencode_dict *) (d))->keys[(pos)], (value) = ((const struct bencode_dict *) (d))->values[(pos)], (pos) < ((const struct bencode_dict *) (d))->n; (pos)++)
#endif /* _BENCODE_H */
#include <string.h>
#include <stdlib.h>
#include "bencode.h"
// reads a binary file with the flag 'rb' on fopen
// return -1 if fail or the number of bytes read
int readbinaryfile(unsigned char * buffer, char * filename) {
FILE * h;
int i = 0;
char tmp;
h = fopen(filename, "rb");
if (!h) {
return -1;
}
while (!feof(h)) {
fread(&tmp,1,1,h);
buffer[i] = tmp;
i++;
}
fclose(h);
return i-1;
}
// read a torrent file. Support torrent with size less than 12k
// 0 = read or parser error
// 1 = file read ok
int parse_torrent(char *torrent_filename) {
struct bencode *torrent;
struct bencode *info;
struct bencode_list *files;
struct bencode_str *filename;
int piece_length = 0;
unsigned char buf[12 * 1024];
unsigned char *sha1_pieces;
int sha1_pieces_len;
int len = 0;
int file_length = 0;
// read a torrent file
memset(buf, 0, sizeof(buf));
len = readbinaryfile(buf, torrent_filename);
// read fail
if (len <= 0) {
return 0;
}
// decode the bencode buffer
torrent = (struct bencode*)ben_decode(buf,len);
// decode fail
if(!torrent) {
return 0;
}
// pull out the .info part, which has stuff about the file we're downloading
info = (struct bencode*)ben_dict_get_by_str((struct bencode*)torrent,"info");
// decode fail
if(!info) {
// clean memory and return
ben_free(torrent);
return 0;
}
// get the piece length
piece_length = ((struct bencode_int*)ben_dict_get_by_str(info,"piece length"))->ll;
Pprintf(false,"parse_torrent piece_length=%d\n", piece_length);
// get the concatened SHA1 from all pieces
sha1_pieces = ((struct bencode_str*)ben_dict_get_by_str(info,"pieces"))->s;
sha1_pieces_len = ((struct bencode_str*)ben_dict_get_by_str(info,"pieces"))->len;
//hexdump(sha1_pieces, sha1_pieces_len, "sha1_pieces");
// get the files dict from multiple file torrent. otherwise is a single file torrent
files = (struct bencode_list*)ben_dict_get_by_str(info,"files");
if (files) {
Pprintf(false, "parse_torrent multiple files\n");
}
else {
filename = (struct bencode_str*)ben_dict_get_by_str(info,"name");
file_length = ((struct bencode_int*)ben_dict_get_by_str(info,"length"))->ll;
Pprintf(false, "parse_torrent filename=%s\n", filename->s);
Pprintf(false, "parse_torrent file_length=%d\n", file_length);
Pprintf(false, "parse_torrent check_next_piece=%d\n", check_next_piece(filename->s, file_length, piece_length));
}
// clean memory and return
ben_free(torrent);
return 1;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment