Last active
July 18, 2021 17:16
-
-
Save henrifroese/715b14b9891405cf47a013f69ecd7147 to your computer and use it in GitHub Desktop.
GridFS file-like error
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
from pymongo import MongoClient | |
import gridfs | |
import io | |
import pyarrow as pa | |
import pyarrow.parquet as pq | |
db = MongoClient().test | |
fs = gridfs.GridFS(db) | |
df = pd.DataFrame({"A": [1,2], "B": [3,4]}) | |
buffer = io.BytesIO() | |
table = pa.Table.from_pandas(df) | |
pq.write_table(table, buffer, version="2.0") | |
doc = fs.put(data=buffer.getvalue()) | |
file_like = fs.get(doc) | |
# f.closed = False # uncomment this line to make it work | |
df_ = pd.read_parquet(file_like, engine="pyarrow") # fails if line above is not uncommented | |
assert df.equals(df_) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Relevant part from pyarrow implementation that leads to error with GridOut. | |
handle would be the `file_like` from above, i.e. a GridOut object. | |
The handle fails the two isinstance-checks, so pyarrow does `set_random_access_file`, which later | |
makes use of the `closed` attribute (that the GridOut object does not have!) | |
""" | |
# If mode was given, check it matches the given file | |
if mode is not None: | |
if isinstance(handle, IOBase): | |
# Python 3 IO object | |
if kind == 'r': | |
if not handle.readable(): | |
raise TypeError("readable file expected") | |
else: | |
if not handle.writable(): | |
raise TypeError("writable file expected") | |
# (other duck-typed file-like objects are possible) | |
# If possible, check the file is a binary file | |
if isinstance(handle, TextIOBase): | |
raise TypeError("binary file expected, got text file") | |
if kind == 'r': | |
self.set_random_access_file( | |
shared_ptr[CRandomAccessFile](new PyReadableFile(handle))) | |
self.is_readable = True |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment