Skip to content

Instantly share code, notes, and snippets.

@jborg
Created May 10, 2014 12:45
Show Gist options
  • Save jborg/2f4d6702201228be1de4 to your computer and use it in GitHub Desktop.
Save jborg/2f4d6702201228be1de4 to your computer and use it in GitHub Desktop.
diff --git a/attic/_chunker.c b/attic/_chunker.c
index 8dbbce5..7cdfa51 100644
--- a/attic/_chunker.c
+++ b/attic/_chunker.c
@@ -1,4 +1,15 @@
#include <Python.h>
+#include <openssl/aes.h>
+
+#if defined(BYTE_ORDER)&&(BYTE_ORDER == BIG_ENDIAN)
+#define _le32toh(x) __builtin_bswap32(x)
+#define _htole32(x) __builtin_bswap32(x)
+#elif defined(BYTE_ORDER)&&(BYTE_ORDER == LITTLE_ENDIAN)
+#define _le32toh(x) (x)
+#define _htole32(x) (x)
+#else
+#error Unknown byte order
+#endif
/* Cyclic polynomial / buzhash: https://en.wikipedia.org/wiki/Rolling_hash */
@@ -80,8 +91,11 @@ typedef struct {
PyObject *fd;
int done, eof;
size_t remaining, bytes_read, bytes_yielded, position, last;
+ AES_KEY key;
} Chunker;
+static uint8_t test_key[32] = {0};
+
static Chunker *
chunker_init(PyObject *fd, int window_size, int chunk_mask, int min_size, uint32_t seed)
{
@@ -101,6 +115,7 @@ chunker_init(PyObject *fd, int window_size, int chunk_mask, int min_size, uint32
c->position = 0;
c->last = 0;
c->eof = 0;
+ AES_set_encrypt_key(test_key, 256, &c->key);
return c;
}
@@ -161,6 +176,9 @@ chunker_process(Chunker *c)
uint32_t sum, chunk_mask = c->chunk_mask, min_size = c->min_size, window_size = c->window_size;
int n = 0;
int old_last;
+ uint8_t input_block[AES_BLOCK_SIZE] = {0}, output_block[AES_BLOCK_SIZE];
+ uint32_t *insecure_sum = (uint32_t *)input_block;
+ uint32_t *secure_sum = (uint32_t *)output_block;
if(c->done) {
if(c->bytes_read == c->bytes_yielded)
@@ -189,10 +207,14 @@ chunker_process(Chunker *c)
}
}
sum = buzhash(c->data + c->position, window_size, c->table);
- while(c->remaining > c->window_size && ((sum & chunk_mask) || n < min_size)) {
+ *insecure_sum = _htole32(sum);
+ AES_encrypt(input_block, output_block, &c->key);
+ while(c->remaining > c->window_size && ((_le32toh(*secure_sum) & chunk_mask) || n < min_size)) {
sum = buzhash_update(sum, c->data[c->position],
c->data[c->position + window_size],
window_size, c->table);
+ *insecure_sum = _htole32(sum);
+ AES_encrypt(input_block, output_block, &c->key);
c->position++;
c->remaining--;
n++;
diff --git a/perftest.py b/perftest.py
new file mode 100644
index 0000000..c71c0a5
--- /dev/null
+++ b/perftest.py
@@ -0,0 +1,23 @@
+from attic.chunker import chunkify
+import time
+from io import BytesIO
+
+
+with open('/dev/urandom', 'rb') as fd:
+ data = fd.read(1024 * 1024 * 100)
+
+def test():
+ t0 = time.time()
+ num_chunks = 0
+ num_bytes = 0
+ for chunk in chunkify(BytesIO(data), 4096, 0xffff, 1024, 0):
+ num_bytes += len(chunk)
+ num_chunks += 1
+ duration = time.time() - t0
+ print('%.2f seconds. %.2f MB/s' % (duration, num_bytes / duration / 1024 / 1024))
+ print('Num bytes:', num_bytes)
+ print('Num chunks:', num_chunks)
+
+if __name__ == '__main__':
+ test()
+
diff --git a/setup.py b/setup.py
index bdc96cd..f45ae0e 100644
--- a/setup.py
+++ b/setup.py
@@ -86,7 +86,7 @@ cmdclass.update({'build_ext': build_ext, 'sdist': Sdist})
ext_modules = [
Extension('attic.crypto', [crypto_source], libraries=['crypto'], include_dirs=include_dirs, library_dirs=library_dirs),
- Extension('attic.chunker', [chunker_source]),
+ Extension('attic.chunker', [chunker_source], libraries=['crypto'], include_dirs=include_dirs, library_dirs=library_dirs),
Extension('attic.hashindex', [hashindex_source])
]
if platform == 'Linux':
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment