Skip to content

Instantly share code, notes, and snippets.

@dalejung
Created January 25, 2016 01:25
Show Gist options
  • Save dalejung/7f2e198982b529f85e80 to your computer and use it in GitHub Desktop.
Save dalejung/7f2e198982b529f85e80 to your computer and use it in GitHub Desktop.
old thing from 2013
import pandas as pd
import numpy as np
def _run_starts_ends(bools):
"""
Returns a start and end bool array that marks the endpoints of
each contigious group of Trues
Parameters
----------
bools : ndarray
Boolean array
"""
diffs = bools.astype(int).diff()
starts = diffs == 1
ends = diffs == -1
# diff won't catch inital True
if bools.iloc[0] == True:
starts.iloc[0] = True
return starts, ends
def run_lengths(bools):
"""
Returns a Series marking the length of each run at its first period.
Parameters
----------
bools : ndarray
Boolean array
Returns
-------
runs : pd.Series
Marks the beginning of each run with the length of run. Uses same index
as bools
"""
starts, ends = _run_starts_ends(bools)
# get the pos-posex of true values
start_pos = list(np.where(starts)[0])
end_pos = list(np.where(ends)[0])
if bools.iloc[-1] == True:
end_pos.append(len(bools))
start_pos = np.array(start_pos)
end_pos = np.array(end_pos)
run_lengths = end_pos - start_pos
runs = pd.Series(np.nan, index=bools.index)
runs.put(start_pos, run_lengths)
return runs
def break_lengths(bools):
"""
"""
breaks = run_lengths(~bools)
return breaks
def grab_runs(bools):
"""
"""
breaks = break_lengths(bools)
runs = pd.DataFrame({'run_length': run_lengths(bools).dropna()})
first = runs.index[0]
last = runs.index[-1]
breaks_before = pd.Series(np.nan, index=runs.index)
breaks_after = pd.Series(np.nan, index=runs.index)
bb = breaks.ix[:last].dropna() # breaks_before
ba = breaks.ix[first:].dropna() # break_after
breaks_before[len(runs)-len(bb):] = bb
breaks_after[:len(ba)] = ba
runs['breaks_after'] = breaks_after
runs['breaks_before'] = breaks_before
return runs
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment