Skip to content

Instantly share code, notes, and snippets.

@esc
Created September 11, 2012 16:27
Show Gist options
  • Save esc/3699663 to your computer and use it in GitHub Desktop.
Save esc/3699663 to your computer and use it in GitHub Desktop.
Benchmark for python-blosc no-copy-on-compression patch
From 2aa62ca0f9b03df90202ef8c7a739939ba6f6395 Mon Sep 17 00:00:00 2001
From: Valentin Haenel <valentin.haenel@gmx.de>
Date: Tue, 11 Sep 2012 13:40:46 +0200
Subject: [PATCH] don't copy large buffers, only small ones
---
blosc/blosc_extension.c | 23 ++++++++++-------------
1 file changed, 10 insertions(+), 13 deletions(-)
diff --git a/blosc/blosc_extension.c b/blosc/blosc_extension.c
index 7f3bd1dbe0..44ba66d169 100644
--- a/blosc/blosc_extension.c
+++ b/blosc/blosc_extension.c
@@ -61,8 +61,8 @@ PyDoc_STRVAR(compress__doc__,
static PyObject *
PyBlosc_compress(PyObject *self, PyObject *args)
{
- PyObject *result_str = NULL;
- void *input, *output;
+ PyObject *output = NULL;
+ void *input;
int clevel, shuffle, cbytes;
int nbytes, typesize;
@@ -72,7 +72,7 @@ PyBlosc_compress(PyObject *self, PyObject *args)
return NULL;
/* Alloc memory for compression */
- output = malloc(nbytes+BLOSC_MAX_OVERHEAD);
+ output = PyBytes_FromStringAndSize(NULL, nbytes+BLOSC_MAX_OVERHEAD);
if (output == NULL) {
PyErr_SetString(PyExc_MemoryError,
"Can't allocate memory to compress data");
@@ -82,22 +82,19 @@ PyBlosc_compress(PyObject *self, PyObject *args)
/* Compress */
Py_BEGIN_ALLOW_THREADS;
cbytes = blosc_compress(clevel, shuffle, (size_t)typesize, (size_t)nbytes,
- input, output, nbytes+BLOSC_MAX_OVERHEAD);
+ input, PyBytes_AS_STRING(output), nbytes+BLOSC_MAX_OVERHEAD);
Py_END_ALLOW_THREADS;
if (cbytes < 0) {
blosc_error(cbytes, "while compressing data");
- free(output);
return NULL;
}
-
- /* This forces a copy of the output, but anyway */
- result_str = PyBytes_FromStringAndSize((char *)output, cbytes);
-
- /* Free the initial buffer */
- free(output);
-
- return result_str;
+ /* Attempt to resize, if it's much smaller, a copy is required. */
+ if (_PyBytes_Resize(&output, cbytes) < 0){
+ /* the memory exception will have been set, hopefully */
+ return NULL;
+ }
+ return output;
}
PyDoc_STRVAR(decompress__doc__,
--
1.7.9.5
From 0ba7255b793023b1af108b619c2c384aa0dfc22d Mon Sep 17 00:00:00 2001
From: Valentin Haenel <valentin.haenel@gmx.de>
Date: Mon, 10 Sep 2012 20:38:52 +0200
Subject: [PATCH] use no-copy strategy from python-snappy
---
blosc/blosc_extension.c | 36 +++++++++++++++++++++++-------------
1 file changed, 23 insertions(+), 13 deletions(-)
diff --git a/blosc/blosc_extension.c b/blosc/blosc_extension.c
index 7f3bd1dbe0..2b16ce8c6b 100644
--- a/blosc/blosc_extension.c
+++ b/blosc/blosc_extension.c
@@ -13,6 +13,24 @@
#include "Python.h"
#include "blosc.h"
+#define RESIZE_TOLERATION 0.75
+
+/* Shamelessly copied from python-snappy */
+static inline PyObject *
+maybe_resize(PyObject *str, size_t expected_size, size_t actual_size)
+{
+ // Tolerate up to 25% slop, to reduce the likelihood of
+ // reallocation and copying.
+ if (actual_size != expected_size) {
+ if (actual_size < expected_size * RESIZE_TOLERATION) {
+ _PyBytes_Resize(&str, actual_size);
+ return str;
+ }
+ Py_SIZE(str) = actual_size;
+ }
+ return str;
+}
+
static PyObject *BloscError;
@@ -61,8 +79,8 @@ PyDoc_STRVAR(compress__doc__,
static PyObject *
PyBlosc_compress(PyObject *self, PyObject *args)
{
- PyObject *result_str = NULL;
- void *input, *output;
+ PyObject *output = NULL;
+ void *input;
int clevel, shuffle, cbytes;
int nbytes, typesize;
@@ -72,7 +90,7 @@ PyBlosc_compress(PyObject *self, PyObject *args)
return NULL;
/* Alloc memory for compression */
- output = malloc(nbytes+BLOSC_MAX_OVERHEAD);
+ output = PyBytes_FromStringAndSize(NULL, nbytes+BLOSC_MAX_OVERHEAD);
if (output == NULL) {
PyErr_SetString(PyExc_MemoryError,
"Can't allocate memory to compress data");
@@ -82,22 +100,14 @@ PyBlosc_compress(PyObject *self, PyObject *args)
/* Compress */
Py_BEGIN_ALLOW_THREADS;
cbytes = blosc_compress(clevel, shuffle, (size_t)typesize, (size_t)nbytes,
- input, output, nbytes+BLOSC_MAX_OVERHEAD);
+ input, PyBytes_AS_STRING(output), nbytes+BLOSC_MAX_OVERHEAD);
Py_END_ALLOW_THREADS;
if (cbytes < 0) {
blosc_error(cbytes, "while compressing data");
- free(output);
return NULL;
}
-
- /* This forces a copy of the output, but anyway */
- result_str = PyBytes_FromStringAndSize((char *)output, cbytes);
-
- /* Free the initial buffer */
- free(output);
-
- return result_str;
+ return maybe_resize(output, nbytes, cbytes);
}
PyDoc_STRVAR(decompress__doc__,
--
1.7.9.5
#!/usr/bin/env python
import time
import numpy
import blosc
def do(str_):
print len(str_)
tic = time.time()
compressed = blosc.compress(str_, typesize=8)
toc = time.time()
print len(compressed)
print("Time: %.2f seconds" % (toc - tic))
array_ = numpy.linspace(1, 100, 2e8)
items = len(array_)
str1 = array_.tostring()
do(str1)
del array_
del str1
array_ = numpy.random.rand(items)
str2 = array_.tostring()
do(str2)
#!/bin/sh
url='https://github.com/esc/python-blosc'
git clone $url
base='29f168afe4'
do_bench(){
cd python-blosc
git clean -dfx
git checkout $1
git am $2
python setup.py build_ext -i
cd ..
echo "Benchmark: $1 with patch: $2"
PYTHONPATH=python-blosc ./bench.py
}
# first benchmark the base commit, give fake patchfile
do_bench $base foo
# then benchmark the python snappy style
do_bench $base ../0001-use-no-copy-strategy-from-python-snappy.patch
# then benchmark my style
do_bench $base ../0001-don-t-copy-large-buffers-only-small-ones.patch
rm -rf python-blosc
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment