Created
July 2, 2024 13:16
-
-
Save CodeByAidan/40855e18cfe25b17347abe51726276bb to your computer and use it in GitHub Desktop.
Preserve any size DataFrame, load/save - FAST!
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from typing import Optional, TypeGuard | |
import pandas as pd | |
from pandas import DataFrame | |
class DataFrameStore: | |
""" | |
A class to store and manage a DataFrame, with the ability to save and load it to | |
and from a file in Feather format. | |
""" | |
def __init__(self, df: Optional[DataFrame] = None) -> None: | |
""" | |
Initialize the DataFrameStore with an optional DataFrame. | |
""" | |
self.df: Optional[DataFrame] = df | |
def save(self, file_path: str) -> None: | |
""" | |
Save the DataFrame to a file in Feather format, optimized for large datasets. | |
Example: | |
-------- | |
>>> import pandas as pd | |
>>> df_sample = pd.DataFrame({'A': range(7119), 'B': range(7119, 14238)}) | |
>>> store = DataFrameStore(df_sample) | |
>>> store.save('large_df.parquet') | |
DataFrame saved to 'large_df.parquet' | |
""" | |
if self.__is_df(): | |
try: | |
self.df.to_feather(file_path) | |
print(f"DataFrame saved to '{file_path}'") | |
except Exception as e: | |
print(f"An error occurred while saving the DataFrame: {e}") | |
else: | |
print("No DataFrame to save!") | |
def load(self, file_path: str, columns: Optional[list] = None) -> None: | |
""" | |
Load the DataFrame from a Feather file, with an option to select specific columns for memory efficiency. | |
Note: Feather format does not support column selection directly during load. | |
Example: | |
-------- | |
>>> import pandas as pd | |
>>> df_sample = pd.DataFrame({'A': range(7119), 'B': range(7119, 14238)}) | |
>>> store = DataFrameStore() | |
>>> store.load('large_df.parquet') | |
DataFrame loaded from 'large_df.parquet' | |
>>> store.df | |
A B | |
0 0 7119 | |
1 1 7120 | |
2 2 7121 | |
3 3 7122 | |
4 4 7123 | |
... ... ... | |
7114 7114 14233 | |
7115 7115 14234 | |
7116 7116 14235 | |
7117 7117 14236 | |
7118 7118 14237 | |
[7119 rows x 2 columns] | |
>>> store.load('large_df.parquet', columns=['A']) | |
DataFrame loaded from 'large_df.parquet' | |
>>> store.df | |
A | |
0 0 | |
1 1 | |
2 2 | |
3 3 | |
4 4 | |
... ... | |
7114 7114 | |
7115 7115 | |
7116 7116 | |
7117 7117 | |
7118 7118 | |
[7119 rows x 1 columns] | |
""" | |
try: | |
df_temp: DataFrame = pd.read_feather(file_path) | |
if columns is not None: | |
self.df = df_temp.loc[:, df_temp.columns.intersection(columns)] | |
else: | |
self.df = df_temp | |
print(f"DataFrame loaded from '{file_path}'") | |
except Exception as e: | |
print(f"An error occurred while loading the DataFrame: {e}") | |
self.df = None | |
def __is_df(self) -> TypeGuard[DataFrame]: | |
""" | |
Check if the DataFrameStore has a DataFrame stored. | |
""" | |
return isinstance(self.df, pd.DataFrame) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment