Created
May 23, 2023 18:41
-
-
Save atemate/9810c78d4c85fefdd7c4fa11a59444ea to your computer and use it in GitHub Desktop.
Lightweigh pd.DataFrame without pandas
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# TODO: use np.array and dtypes there | |
class _MyIndexer: | |
def __init__(self, obj) -> None: | |
self._obj = obj | |
def __getitem__(self, idx): | |
print('__getitem__', idx, type(idx), isinstance(idx, (list, tuple))) | |
if isinstance(idx, str): | |
return self._obj[idx] | |
if isinstance(idx, tuple): | |
if len(idx) != 2: | |
raise ValueError(f"Only 2-dim frames supported, got index for .loc: {idx}") | |
slise, columns = idx | |
if not isinstance(columns, (list, np.ndarray, pd.Series)): | |
columns = [columns] | |
result = [self._obj[c][slise] for c in columns] | |
print('__getitem__ result: ', result) | |
return result | |
if isinstance(idx, list): | |
result = [self._obj[c] for c in idx] | |
print('__getitem__ result: ', result) | |
return result | |
# if isinstance(indices, slice): | |
raise NotImplementedError(f"Not implemented: .loc for index of type {type(idx)}") | |
class MyDataFrame(dict): # not collections.UserDict for better performance | |
def __init__(self, *args, **kwargs): | |
if kwargs: | |
raise NotImplementedError( | |
f"Instantiating {self.__class__.__name__} with kwargs " | |
f"not supported, got: {kwargs.names()}" | |
) | |
if len(args) > 1: | |
raise NotImplementedError( | |
f"Instantiating {self.__class__.__name__} with more " | |
f"than one positional argument not supported, got: {len(args)}" | |
) | |
dtypes = {} | |
arg = args[0] | |
if isinstance(arg, pd.DataFrame): | |
df = arg | |
arg = df.to_dict(orient='list') | |
dtypes = df.dtypes.to_dict() | |
# elif isinstance(arg, list): | |
# # list of records | |
# arg_list = arg | |
# result = collections.defaultdict(list) | |
# for row in arg_list: | |
# if not isinstance(row, dict): | |
# raise ValueError(f"Must be a dict: {row}") | |
# for k, v in row.items(): | |
# result[k].append(v) | |
# result["__dtypes"] = [None] * len(result[k]) | |
# arg = result | |
arg = {k: np.array(v, dtype=dtypes.get(k)) for k, v in arg.items()} | |
dict.__init__(self, arg) | |
self.__indexer = _MyIndexer(self) | |
@property | |
def columns(self): | |
return sorted(set(self)) | |
@property | |
def dtypes(self): | |
return set(v.dtype for v in self.values()) | |
@property | |
def shape(self): | |
lenmap = {k: len(self[k]) for k in self} | |
lengths = list(set(lenmap.values())) | |
if len(lengths) == 0: | |
return (0,) | |
if len(lengths) > 1: | |
cols = {k: v for k, v in lenmap.items() if v in lengths} | |
raise ValueError(f"Some columns have different lengths: {cols}") | |
common_length = lengths[0] | |
return (len(lenmap), common_length) | |
@property | |
def ndim(self): | |
return 2 | |
@property | |
def loc(self): | |
return self.__indexer | |
def iloc(self, *args, **kwargs): | |
raise NotImplementedError | |
# def drop(self, *args, columns, inplace=False, **kwargs): | |
# if args or kwargs: | |
# raise NotImplementedError() | |
# columns = set(columns) | |
# missing = columns - set(self.columns) | |
# if missing: | |
# raise KeyError(f"{list(missing)} not found in axis") | |
# if inplace: | |
# for col in columns: | |
# del self[col] | |
# else: | |
# return MyDataFrame({k: v for k, v in self.items() if k not in columns}) | |
def to_df(self): | |
# data = {k: v for k, v in self.items() if k not in self.RESERVED_COLUMNS} | |
return pd.DataFrame(self) | |
def __array__(self): | |
raise NotImplementedError | |
# df = pd.DataFrame({'review_date': ['2022-11-12', '2022-11-13'], 'keks': [1, 2]}) | |
# mydf = MyDataFrame(df) | |
# mydf.columns, mydf.dtypes, mydf.shape, mydf.to_df().dtypes | |
# mydf.loc['review_date'], mydf.loc[['review_date']], mydf.loc[1:, 'review_date'] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment