Created
June 7, 2020 18:11
-
-
Save charles-l/8999a3cc9d294362a329ceec28d0745c to your computer and use it in GitHub Desktop.
has_many relationship from Rails for Pandas
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
@pd.api.extensions.register_dataframe_accessor("rel") | |
class RelationshipAccessor: | |
''' | |
Add a relationship accessor to dataframe objects allowing Rails-like | |
access to related dataframes. e.g. | |
>>> authors = pd.DataFrame({'name': ['C. S. Lewis', 'Lewis Carroll']}) | |
>>> books = pd.DataFrame({'title': ["Alice's Adventures in Wonderland", | |
... 'Through the looking glass', | |
... 'The Lion, the Witch, and the Wardrobe'], | |
... 'author_id': [1, 1, 0]}) | |
>>> authors.rel.has_many('books', books, 'author_id') | |
>>> len(authors[authors.name == 'Lewis Carroll'].rel.books) | |
2 | |
''' | |
def __init__(self, pandas_obj): | |
self.obj = pandas_obj | |
self.relations = {} | |
# HACK: recurse up the tree of dataframes that this dataframe was | |
# copied from (if any) | |
# This has to be tracked manually because Pandas creates a fresh | |
# RelationshipAccessor object every time it is accessed on a new | |
# dataframe. | |
if self.obj._is_copy: | |
self.relations.update(self.obj._is_copy().rel.relations) | |
def _get_related(self, df, colname): | |
return df[df[colname].isin(self.obj.index)] | |
def has_many(self, relname, df, colname): | |
self.relations[relname] = (df, colname) | |
def __getattr__(self, name): | |
if name in self.relations: | |
df, colname = self.relations[name] | |
return self._get_related(df, colname) | |
raise AttributeError('undefined attribute', name) | |
if __name__ == '__main__': | |
import doctest | |
doctest.testmod() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment