Skip to content

Instantly share code, notes, and snippets.

@henrifroese
Last active July 18, 2021 17:16
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save henrifroese/715b14b9891405cf47a013f69ecd7147 to your computer and use it in GitHub Desktop.
Save henrifroese/715b14b9891405cf47a013f69ecd7147 to your computer and use it in GitHub Desktop.
GridFS file-like error
import pandas as pd
from pymongo import MongoClient
import gridfs
import io
import pyarrow as pa
import pyarrow.parquet as pq
db = MongoClient().test
fs = gridfs.GridFS(db)
df = pd.DataFrame({"A": [1,2], "B": [3,4]})
buffer = io.BytesIO()
table = pa.Table.from_pandas(df)
pq.write_table(table, buffer, version="2.0")
doc = fs.put(data=buffer.getvalue())
file_like = fs.get(doc)
# f.closed = False # uncomment this line to make it work
df_ = pd.read_parquet(file_like, engine="pyarrow") # fails if line above is not uncommented
assert df.equals(df_)
"""
Relevant part from pyarrow implementation that leads to error with GridOut.
handle would be the `file_like` from above, i.e. a GridOut object.
The handle fails the two isinstance-checks, so pyarrow does `set_random_access_file`, which later
makes use of the `closed` attribute (that the GridOut object does not have!)
"""
# If mode was given, check it matches the given file
if mode is not None:
if isinstance(handle, IOBase):
# Python 3 IO object
if kind == 'r':
if not handle.readable():
raise TypeError("readable file expected")
else:
if not handle.writable():
raise TypeError("writable file expected")
# (other duck-typed file-like objects are possible)
# If possible, check the file is a binary file
if isinstance(handle, TextIOBase):
raise TypeError("binary file expected, got text file")
if kind == 'r':
self.set_random_access_file(
shared_ptr[CRandomAccessFile](new PyReadableFile(handle)))
self.is_readable = True
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment