Skip to content

Instantly share code, notes, and snippets.

View BryanCutler's full-sized avatar

Bryan Cutler BryanCutler

View GitHub Profile
@BryanCutler
BryanCutler / tep_extending_pandas_blog1_3.ipynb
Created May 3, 2021 20:13
Text Extensions for Pandas: Tips and Techniques for Extending Pandas, Part 1 Blog
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
class SpanOpMixin:
def __add__(self, other) -> Union["Span", "SpanArray"]:
if isinstance(other, (ABCDataFrame, ABCSeries, ABCIndexClass)):
# Rely on pandas to unbox and dispatch to us.
return NotImplemented
if isinstance(self, Span) and isinstance(other, Span):
# Span + *Span = Span
return Span(self.target_text,
@BryanCutler
BryanCutler / tep_extending_pandas_blog1_6.py
Last active May 4, 2021 21:55
Text Extensions for Pandas: Tips and Techniques for Extending Pandas, Part 1 Blog
# Reduce the `SpanArray` to a single `Span` covering tokens 2 up to 5.
df["span"].iloc[2:5].sum()
# [6, 20): 'Python and the'
@BryanCutler
BryanCutler / tep_extending_pandas_blog1_7.py
Last active May 3, 2021 22:10
Text Extensions for Pandas: Tips and Techniques for Extending Pandas, Part 1 Blog
@pytest.fixture
def dtype():
""" Return dtype of your extension array."""
return TensorDtype()
 
@pytest.fixture
def data(dtype):
""" Return an extension array as data for the tests."""
return pd.array(np.array([[i] for i in range(100)]), dtype=dtype)
@BryanCutler
BryanCutler / tep_extending_pandas_blog1_5_0.py
Last active May 4, 2021 22:31
Text Extensions for Pandas: Tips and Techniques for Extending Pandas, Part 1 Blog
# Addition of `Span` with Series of `SpanDtype` produces another Series
df["span"].iloc[1] + df["span"].iloc[3:5]
# 3 [0, 16): 'Monty Python and'
# 4 [0, 20): 'Monty Python and the'
# Name: span, dtype: SpanDtype