Skip to content

Instantly share code, notes, and snippets.

@dutc
Created October 29, 2022 00:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dutc/914d74397f3b8e657b77e7f27d4bb78b to your computer and use it in GitHub Desktop.
Save dutc/914d74397f3b8e657b77e7f27d4bb78b to your computer and use it in GitHub Desktop.
Longest consecutive region meeting a specified predicate (in `pandas`)
#!/usr/bin/env python3
from numpy.random import default_rng
from pandas import Series, date_range, MultiIndex, Index, to_timedelta
from string import ascii_lowercase
rng = default_rng(0)
s = Series(
index=(idx := MultiIndex.from_product([
rng.choice([*ascii_lowercase], size=(100, 4)).view('<U4').ravel(),
date_range('2020-01', freq='T', periods=180*24*60),
], names=['entity', 'date'])),
data=rng.integers(-10, +10, size=len(idx)),
name='signal',
).pipe(
lambda s: s.set_axis(
MultiIndex.from_arrays([
s.index.get_level_values('entity'),
s.index.get_level_values('date') + to_timedelta(rng.integers(3600, size=len(s)), unit='s'),
])
)
.sample(frac=.50)
.sort_index()
)
def consecutive_regions(obj, pred):
return Series(
data=(pred != pred.shift()).cumsum(),
index=obj.index,
name='region',
)
print(
s.groupby('entity').apply(
lambda g:
(reg := consecutive_regions(g, pred := g > 0)).loc[pred]
.pipe(lambda s: s.loc[s == s.groupby(s).count().idxmax()])
.pipe(lambda s: g.loc[s.index].droplevel('entity'))
)
sep='\n',
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment