Skip to content

Instantly share code, notes, and snippets.

@benkrikler
Last active November 9, 2018 13:43
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save benkrikler/cfa0f3703358d5bd7b5abd5fae36db29 to your computer and use it in GitHub Desktop.
Save benkrikler/cfa0f3703358d5bd7b5abd5fae36db29 to your computer and use it in GitHub Desktop.
import pandas as pd
_interval_regex = r"^(?P<open>[[(])"
_interval_regex += r"(?P<low>-inf|[0-9][.0-9]*)"
_interval_regex += r"\s*,\s*"
_interval_regex += r"(?P<high>\+?inf|[0-9][.0-9]*)"
_interval_regex += r"(?P<close>[)\]])$"
def interval_from_string(series):
if not pd.api.types.is_string_dtype(series):
return series
extracted = series.str.extract(_interval_regex)
extracted = extracted.dropna()
if len(extracted) != len(series):
return series
left_closed = extracted.open.unique()
right_closed = extracted.close.unique()
if len(left_closed) != 1 or len(right_closed) != 1:
return series
left_closed = left_closed[0] == "["
right_closed = right_closed[0] == "]"
if left_closed:
if right_closed:
closed = "both"
else:
closed = "left"
else:
if right_closed:
closed = "right"
else:
closed = "neither"
interval = pd.IntervalIndex.from_arrays(left=pd.to_numeric(extracted.low),
right=pd.to_numeric(extracted.high),
closed=closed)
return interval
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment