Created
May 5, 2025 23:16
-
-
Save yihsquareup/2d56462832e8b2d4f1e07ee7958e8119 to your computer and use it in GitHub Desktop.
Retrieve features.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
df_features_web = pd.read_parquet(dumbo_out_path + "/results") | |
print(df_features_web.shape) | |
df_features_web.head(2) | |
--------------------------------------------------------------------------- | |
ArrowInvalid Traceback (most recent call last) | |
/tmp/ipykernel_1199/1213197556.py in <cell line: 1>() | |
----> 1 df_features_web = pd.read_parquet(dumbo_out_path + "/results") | |
2 print(df_features_web.shape) | |
3 df_features_web.head(2) | |
/code/.venv/lib/python3.9/site-packages/pandas/io/parquet.py in read_parquet(path, engine, columns, storage_options, use_nullable_dtypes, dtype_backend, filesystem, filters, **kwargs) | |
665 check_dtype_backend(dtype_backend) | |
666 | |
--> 667 return impl.read( | |
668 path, | |
669 columns=columns, | |
/code/.venv/lib/python3.9/site-packages/pandas/io/parquet.py in read(self, path, columns, filters, use_nullable_dtypes, dtype_backend, storage_options, filesystem, **kwargs) | |
272 ) | |
273 try: | |
--> 274 pa_table = self.api.parquet.read_table( | |
275 path_or_handle, | |
276 columns=columns, | |
/code/.venv/lib/python3.9/site-packages/pyarrow/parquet/core.py in read_table(source, columns, use_threads, schema, use_pandas_metadata, read_dictionary, memory_map, buffer_size, partitioning, filesystem, filters, use_legacy_dataset, ignore_prefixes, pre_buffer, coerce_int96_timestamp_unit, decryption_properties, thrift_string_size_limit, thrift_container_size_limit, page_checksum_verification) | |
1791 | |
1792 try: | |
-> 1793 dataset = ParquetDataset( | |
1794 source, | |
1795 schema=schema, | |
/code/.venv/lib/python3.9/site-packages/pyarrow/parquet/core.py in __init__(self, path_or_paths, filesystem, schema, filters, read_dictionary, memory_map, buffer_size, partitioning, ignore_prefixes, pre_buffer, coerce_int96_timestamp_unit, decryption_properties, thrift_string_size_limit, thrift_container_size_limit, page_checksum_verification, use_legacy_dataset) | |
1346 except ValueError: | |
1347 filesystem = LocalFileSystem(use_mmap=memory_map) | |
-> 1348 finfo = filesystem.get_file_info(path_or_paths) | |
1349 if finfo.type == FileType.Directory: | |
1350 self._base_dir = path_or_paths | |
/code/.venv/lib/python3.9/site-packages/pyarrow/_fs.pyx in pyarrow._fs.FileSystem.get_file_info() | |
/code/.venv/lib/python3.9/site-packages/pyarrow/error.pxi in pyarrow.lib.pyarrow_internal_check_status() | |
/code/.venv/lib/python3.9/site-packages/pyarrow/error.pxi in pyarrow.lib.check_status() | |
ArrowInvalid: Empty path component in path ds-cash-production-personal/yih//results |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment