Skip to content

Instantly share code, notes, and snippets.

@viktor-ferenczi
Forked from luispedro/pdio.py
Last active September 8, 2017 22:50
Show Gist options
  • Save viktor-ferenczi/a8bdf7de2d3fcfc1f7769af463c2d243 to your computer and use it in GitHub Desktop.
Save viktor-ferenczi/a8bdf7de2d3fcfc1f7769af463c2d243 to your computer and use it in GitHub Desktop.
Save & load from a pandas DataFrame/Series
"""Saving and loading Pandas DataFrame and Series objects
Tested with Python 3.6.1 on 64 bit Windows 10
Original blog post: https://metarabbit.wordpress.com/2013/12/10/how-to-save-load-large-pandas-dataframes/
Original Gist was for Python 2.7: https://gist.github.com/luispedro/7887214
"""
import io
import numpy.lib
import numpy as np
import pandas as pd
import pickle
### Saving/loading dataframes
def save_pandas(fname, data):
'''Save DataFrame or Series
Parameters
----------
fname : str
filename to use
data: Pandas DataFrame or Series
'''
np.save(open(fname, 'wb'), data)
if len(data.shape) == 2:
meta = (data.index, data.columns)
elif len(data.shape) == 1:
meta = (data.index, )
else:
raise ValueError('save_pandas: Cannot save this type')
s = pickle.dumps(meta)
with open(fname, 'ab') as f:
f.seek(0, io.SEEK_END)
f.write(s)
def load_pandas(fname, mmap_mode='r'):
'''Load DataFrame or Series
Parameters
----------
fname : str
filename
mmap_mode : str, optional
Same as numpy.load option
'''
values = np.load(fname, mmap_mode=mmap_mode)
with open(fname, 'rb') as f:
numpy.lib.format.read_magic(f)
numpy.lib.format.read_array_header_1_0(f)
f.seek(values.dtype.alignment*values.size, io.SEEK_CUR)
meta = pickle.loads(f.read())
if len(meta) == 2:
return pd.DataFrame(values, index=meta[0], columns=meta[1])
elif len(meta) == 1:
return pd.Series(values, index=meta[0])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment