Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save yihsquareup/2d56462832e8b2d4f1e07ee7958e8119 to your computer and use it in GitHub Desktop.
Save yihsquareup/2d56462832e8b2d4f1e07ee7958e8119 to your computer and use it in GitHub Desktop.
Retrieve features.
df_features_web = pd.read_parquet(dumbo_out_path + "/results")
print(df_features_web.shape)
df_features_web.head(2)
---------------------------------------------------------------------------
ArrowInvalid Traceback (most recent call last)
/tmp/ipykernel_1199/1213197556.py in <cell line: 1>()
----> 1 df_features_web = pd.read_parquet(dumbo_out_path + "/results")
2 print(df_features_web.shape)
3 df_features_web.head(2)
/code/.venv/lib/python3.9/site-packages/pandas/io/parquet.py in read_parquet(path, engine, columns, storage_options, use_nullable_dtypes, dtype_backend, filesystem, filters, **kwargs)
665 check_dtype_backend(dtype_backend)
666
--> 667 return impl.read(
668 path,
669 columns=columns,
/code/.venv/lib/python3.9/site-packages/pandas/io/parquet.py in read(self, path, columns, filters, use_nullable_dtypes, dtype_backend, storage_options, filesystem, **kwargs)
272 )
273 try:
--> 274 pa_table = self.api.parquet.read_table(
275 path_or_handle,
276 columns=columns,
/code/.venv/lib/python3.9/site-packages/pyarrow/parquet/core.py in read_table(source, columns, use_threads, schema, use_pandas_metadata, read_dictionary, memory_map, buffer_size, partitioning, filesystem, filters, use_legacy_dataset, ignore_prefixes, pre_buffer, coerce_int96_timestamp_unit, decryption_properties, thrift_string_size_limit, thrift_container_size_limit, page_checksum_verification)
1791
1792 try:
-> 1793 dataset = ParquetDataset(
1794 source,
1795 schema=schema,
/code/.venv/lib/python3.9/site-packages/pyarrow/parquet/core.py in __init__(self, path_or_paths, filesystem, schema, filters, read_dictionary, memory_map, buffer_size, partitioning, ignore_prefixes, pre_buffer, coerce_int96_timestamp_unit, decryption_properties, thrift_string_size_limit, thrift_container_size_limit, page_checksum_verification, use_legacy_dataset)
1346 except ValueError:
1347 filesystem = LocalFileSystem(use_mmap=memory_map)
-> 1348 finfo = filesystem.get_file_info(path_or_paths)
1349 if finfo.type == FileType.Directory:
1350 self._base_dir = path_or_paths
/code/.venv/lib/python3.9/site-packages/pyarrow/_fs.pyx in pyarrow._fs.FileSystem.get_file_info()
/code/.venv/lib/python3.9/site-packages/pyarrow/error.pxi in pyarrow.lib.pyarrow_internal_check_status()
/code/.venv/lib/python3.9/site-packages/pyarrow/error.pxi in pyarrow.lib.check_status()
ArrowInvalid: Empty path component in path ds-cash-production-personal/yih//results
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment