def convert_to_dict(source):
if isinstance(source, dict):
data = {}
for key, value in source.items():
new_key = convert_to_dict(key)
data[new_key] = convert_to_dict(value)
return data
elif isinstance(source, (pd.DataFrame, pd.Series)):
# resetting multiindex levels
if isinstance(source.index, pd.MultiIndex):
for i in range(1, source.index.nlevels):
source = source.reset_index(level=i)
# Converting all timestamps in index and values to strings of local date format
if source.index.dtype.str[1].lower() == 'm':
source.index = source.index.strftime('%x')
if isinstance(source, pd.Series):
if source.dtype.str[1].lower() == 'm':
#source = source.astype(str)
source = source.dt.strftime('%x')
if isinstance(source, pd.DataFrame):
for clmn in source.columns:
if source[clmn].dtype.str[1].lower() == 'm':
source[clmn] = source[clmn].dt.strftime('%x')
# use same column loop to convert numeric column names to str
if not isinstance(clmn, str):
source = source.rename(columns={clmn: str(clmn)})
# use pythion None instead of pandas-specific NaT and NaN
source = source.replace([np.nan], [None])
# use basic float type for all numeric data
source = source.astype(dtype=float, errors='ignore')
# converting to dictionaries
if isinstance(source, pd.DataFrame):
source = source.to_dict(orient='list')
if isinstance(source, pd.Series):
source = source.to_dict()
return source
elif isinstance(source, pd.Timestamp):
return source.to_pydatetime()
elif isinstance(source, np.int64):
return np.asscalar(source)
elif isinstance(source, np.ndarray):
return source.tolist()
else:
return source
Last active
December 15, 2017 10:25
-
-
Save matse004/f9b06818819dd05cb4fe0898708a13cb to your computer and use it in GitHub Desktop.
Convert pandas DataFrames and Series into python dictionaries that can be saved in MongoDB
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Couldn't find a nice native solution;
odo doesn't deal with floats correctly, also has issues with networkx dependency