Skip to content

Instantly share code, notes, and snippets.

@mmngreco
Forked from masdeseiscaracteres/README.md
Created April 5, 2021 06:27
Show Gist options
  • Save mmngreco/ae10ef6ac4f3f069e80e5e88004c26b6 to your computer and use it in GitHub Desktop.
Save mmngreco/ae10ef6ac4f3f069e80e5e88004c26b6 to your computer and use it in GitHub Desktop.
Rolling apply on numpy arrays using Pandas internals
####################################################################
# Data generation
x = np.arange(10, dtype='float64')
s = pd.Series(x)
window=3
min_periods = 0
closed = 'right'
# indices = np.arange(len(x), dtype='int64')
indices = None
####################################################################
roll_sum = pd._libs.window.roll_sum
%timeit roll_sum(x, window, min_periods, indices, closed)
R = s.rolling(window, min_periods)
%timeit R.sum()
####################################################################
roll_generic = pd._libs.window.roll_generic
func = np.sum
%timeit roll_generic(x, window, min_periods, indices, closed, 0, func, (), ())
%timeit R.apply(func)
####################################################################
# roll_sum performance is comparable to its competitors
from scipy.signal import lfilter
h = np.array([1]*window, dtype=float)
%timeit lfilter(h, 1.0, x)
from bottleneck import move_sum
%timeit move_sum(x, window, min_count=1)
import numpy as np
from numba import guvectorize
def rolling_window(a, window):
shape = a.shape[:-1] + (a.shape[-1] - window + 1, window)
strides = a.strides + (a.strides[-1],)
return np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides)
def rolling_apply_unary(g):
# Note: a new f() is compiled each time rolling_apply() is called!
@guvectorize(['void(float64[:],int64, float64[:])'], '(n),()-> (n)')
def f(arg0, window, out):
transient_len = window-1
for i in range(transient_len):
out[i] = np.nan
for i in range(transient_len, len(arg0)):
win_slice = slice(i-window+1, i+1)
out[i] = g(arg0[win_slice])
return f
def rolling_apply_binary(g):
# Note: a new f() is compiled each time rolling_apply() is called!
@guvectorize(['void(float64[:], float64[:], int64, float64[:])'], '(n),(n),()-> (n)')
def f(arg0, arg1, window, out):
transient_len = window-1
for i in range(transient_len):
out[i] = np.nan
for i in range(transient_len, len(arg0)):
win_slice = slice(i-window+1, i+1)
out[i] = g(arg0[win_slice], arg1[win_slice])
return f
unary_foo = np.sum
f1 = rolling_apply_unary(unary_foo)
binary_foo = np.dot
f2 = rolling_apply_binary(binary_foo)
window=3
x = np.multiply.outer([1,-1],np.arange(10))
f1(x,window)
f2(x,x,window)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment