Skip to content

Instantly share code, notes, and snippets.

@dvgodoy
Last active November 24, 2022 16:58
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dvgodoy/e2a9dac28976a2c93ea7e337228a5b26 to your computer and use it in GitHub Desktop.
Save dvgodoy/e2a9dac28976a2c93ea7e337228a5b26 to your computer and use it in GitHub Desktop.
from scipy import interpolate
def fill_values(data,
max_contiguous_na=5,
periods_for_extrapolation=5,
method='slinear'):
time = np.arange(len(data))
signal = data.copy()
# If there are no valid data points, there's nothing to fill
if np.all(np.isnan(signal)):
return signal
elif np.any(np.isnan(signal)):
# Gets minimum and maximum values
minv, maxv = np.nanmin(signal), np.nanmax(signal)
# Uses only the values in the longest sequence containing no more than
# a given number of contiguous missing points (5)
ini, end = bounds(data, max_contiguous_na)
signal = signal[ini:end]
# Creates a filler function to interpolate the missing values over time
try:
filler_func = interpolate.interp1d(time[ini:end][~np.isnan(signal)],
signal[~np.isnan(signal)],
kind=method)
except ValueError:
return signal
# Passes the time as argument to the filler function
filled = filler_func(time[ini:end])
# Caps interpolated values at actually observed min and max values
filled = np.minimum(filled, maxv)
filled = np.maximum(filled, minv)
if ini > 0:
# Rebuilds the full sized array, if the longest sequence
# doesn't start at zero
filled = np.concatenate([[np.nan] * ini, filled])
if end < len(data):
# If the longest sequence ends before the last time period,
# extrapolates the last missing values using the average of
# a given number of periods
avg = filled[-periods_for_extrapolation:].mean()
filled = np.concatenate([filled,
np.nan_to_num(data[end:].copy(), nan=avg)])
return filled
else:
return signal
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment