Skip to content

Instantly share code, notes, and snippets.

@alexland
Last active November 28, 2023 07:12
Show Gist options
  • Star 38 You must be signed in to star a gist
  • Fork 11 You must be signed in to fork a gist
  • Save alexland/ce02d6ae5c8b63413843 to your computer and use it in GitHub Desktop.
Save alexland/ce02d6ae5c8b63413843 to your computer and use it in GitHub Desktop.
serialize, persist, retrieve, and de-serialize a NumPy array as a binary string (any dimension, any dtype); exemplary use case: a web app calculates some result--eg, from a Machine Learning algorithm, using NumPy and the result is a NumPy array; it is efficient to just return that result to rather than persist the array then retrieve it via query
import time
import numpy as NP
from redis import StrictRedis as redis
# a 2D array to serialize
A = 10 * NP.random.randn(10000).reshape(1000, 10)
# flatten the 2D NumPy array and save it as a binary string
array_dtype = str(A.dtype)
l, w = A.shape
A = A.ravel().tostring()
# create a key as a UNIX timestamp w/ array shape appended to end of key delimited by '|'
db = redis(db=0)
key = '{0}|{1}#{2}#{3}'.format(int(time.time()), array_dtype, l, w)
# store the binary string in redis
db.set(key, A)
# retrieve the proto-array from redis
A1 = db.get(key)
# deserialize it
array_dtype, l, w = key.split('|')[1].split('#')
A = NP.fromstring(A1, dtype=array_dtype).reshape(int(l), int(w))
@YehudaItkin
Copy link

Thanks man!

@wecacuee
Copy link

wecacuee commented Mar 6, 2018

from io import BytesIO
import numpy as np


# a 2D array to serialize
A = 10 * NP.random.randn(10000).reshape(1000, 10)
with BytesIO() as b:
      np.save(b, A)
      serialized_A = b.getvalue()

A = np.load(BytesIO(serliazed_A))

@moddabiri
Copy link

import numpy as np
from time import time


def method1():
	A = 10 * np.random.randn(10000).reshape(1000, 10)
	
	start_time = time()

	serialized_A = None
	with BytesIO() as b:
		np.save(b, A)
		serialized_A = b.getvalue()
	ser_end = time()

	A = np.load(BytesIO(serialized_A))
	deser_end = time()

	print("Method1: Ser:%f Des:%f" % ((ser_end - start_time), 
			(deser_end - ser_end)))


def method2():
	A = 10 * np.random.randn(10000).reshape(1000, 10)

	start_time = time()

	l, w = A.shape
	A2 = A.ravel().tostring()
	ser_end = time()

	np.fromstring(A2).reshape(int(l), int(w))
	deser_end = time()

	print("Method2: Ser:%f Des:%f" % ((ser_end - start_time), 
			(deser_end - ser_end)))


if __name__ == "__main__":
	method1()
	method2()

	method1()
	method2()

--------------------------------
Output:
Method1: Ser:0.002921 Des:0.000512
Method2: Ser:0.000008 Des:0.000032
Method1: Ser:0.000217 Des:0.000285
Method2: Ser:0.000007 Des:0.000013

@gwerbin
Copy link

gwerbin commented Oct 15, 2018

Both the BytesIO and "binary string" methods require making copies of the array, right? Would np.frombuffer instead of np.fromstring be able to load data from Redis without copying?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment