Last active
August 24, 2021 15:11
-
-
Save mattjj/5213172 to your computer and use it in GitHub Desktop.
data chunking with overlapping windows! uses stride tricks so no data is copied; it just produces a view!
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import division | |
import numpy as np | |
from numpy.lib.stride_tricks import as_strided as ast | |
def chunk_data(data,window_size,overlap_size=0,flatten_inside_window=True): | |
assert data.ndim == 1 or data.ndim == 2 | |
if data.ndim == 1: | |
data = data.reshape((-1,1)) | |
# get the number of overlapping windows that fit into the data | |
num_windows = (data.shape[0] - window_size) // (window_size - overlap_size) + 1 | |
overhang = data.shape[0] - (num_windows*window_size - (num_windows-1)*overlap_size) | |
# if there's overhang, need an extra window and a zero pad on the data | |
# (numpy 1.7 has a nice pad function I'm not using here) | |
if overhang != 0: | |
num_windows += 1 | |
newdata = np.zeros((num_windows*window_size - (num_windows-1)*overlap_size,data.shape[1])) | |
newdata[:data.shape[0]] = data | |
data = newdata | |
sz = data.dtype.itemsize | |
ret = ast( | |
data, | |
shape=(num_windows,window_size*data.shape[1]), | |
strides=((window_size-overlap_size)*data.shape[1]*sz,sz) | |
) | |
if flatten_inside_window: | |
return ret | |
else: | |
return ret.reshape((num_windows,-1,data.shape[1])) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
In the last one, the first row contains the first three vectors (which are pairs), etc.