def resample(tseries, rate='15T', short_rate='S', max_gap=None):
""" Resample (unevenly spaced) timeseries data linearly by first upsampling to a
high frequency (short_rate) then downsampling to the desired rate.
:param tseries: a pandas timeseries object
:param rate: rate that tseries should be resampled to
:param short_rate: intermediate upsampling rate; if None, smallest interval of tseries is used
:param max_gap: null intervals larger than `max_gap` are being treated as missing
data and not interpolated. if None, always interpolate. must be provided as pandas
frequency string format, e.g. '6h'
Copyright (c) 2017 WATTx GmbH
License: Apache License
# return series if empty
if tseries.empty:
return tseries
# check for datetime index
assert isinstance(
tseries.index[0], pd.tslib.Timestamp), 'Object must have a datetime-like index.'
# sort tseries by time
# create timedelta from frequency string
rate_delta = to_offset(rate).delta
# compute time intervals
diff = np.diff(tseries.index) / np.timedelta64(1, 's')
if max_gap is not None:
# identify intervals in tseries larger than max_gap
idx = np.where(np.greater(diff, to_offset(max_gap).delta.total_seconds()))[0]
start = tseries.index[idx].tolist()
stop = tseries.index[idx + 1].tolist()
# store start and stop indices of large intervals
big_gaps = list(zip(start, stop))
if short_rate is None:
# use minimal nonzero interval of original series as short_rate
short_rate = '%dS' % diff[np.nonzero(diff)].min()
# create timedelta from frequency string
short_rate_delta = to_offset(short_rate).delta
# if smallest interval is still larger than rate, use rate instead
if short_rate_delta > rate_delta:
short_rate = rate
# convert frequency string to timedelta
short_rate_delta = to_offset(short_rate).delta
# make sure entered short_rate is smaller than rate
assert rate_delta >= short_rate_delta, 'short_rate must be <= rate'
# upsample to short_rate
tseries = tseries.resample(short_rate, how='mean').interpolate()
# downsample to desired rate
tseries = tseries.resample(rate, how='ffill')
# replace values in large gap itervals with NaN
if max_gap is not None:
for start, stop in big_gaps:
tseries[start:stop] = None
return tseries

@AlexEngelhardt AlexEngelhardt commented Nov 20, 2018

Howdy! This would be very helpful for me right now, but where is the to_offset function defined?

Edit: Ah nevermind, I should have googled: from pandas.tseries.frequencies import to_offset


@david-waterworth david-waterworth commented Jun 12, 2019

An alternative that might be more efficient is to use reindex ( As far as I can see this doesn't exhibit the shift introduced by resample().mean().interpolate()? The gap filter is quite useful!

