-
-
Save PokhodenkoSA/becf462bfc68791dd35b7e0a2691905b to your computer and use it in GitHub Desktop.
PyArrow porting 0.14.1 -> 0.15.0
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
- python setup.py build -f | |
- ошибки сборки на Windows, что-то с rc.exe, но фикс в документации не помогает. => перешел на WSL | |
- ошибки компиляции hpat/io/_parquet.cpp | |
1. сменился интерфейс | |
parquet::arrow::FileReader::ReadColumn(..., std::shared_ptr<arrow::Array>*) | |
parquet::arrow::FileReader::ReadColumn(..., std::shared_ptr<arrow::ChunkedArray>*) | |
/// \note Deprecated since 0.12 | |
ARROW_DEPRECATED("Use version with ChunkedArray output") | |
::arrow::Status ReadColumn(int i, std::shared_ptr<::arrow::Array>* out); | |
2. Используется data(), которая есть у Array, но нет у ChunkedArray. ChunkedArray состоит из нескольких Array. | |
error: 'using element_type = class arrow::ChunkedArray {aka class arrow::ChunkedArray}' has no member named 'data' | |
auto buffers = arr->data()->buffers; | |
Fix: auto arr = chunked_array->chunk(0); | |
Это хак, не учитываются все Array. Может не работатт для очень больших данных. | |
Недостаточно опыта, чтоб понять, как работать с ChunkedArray. | |
В функции pq_read_single_file() данные копируются в out_data (указатель пришел снаружи и не извествно сколько там места). | |
Можно сделать итерацию по чанкам и повторить имеющиеся команды несколько раз. | |
3. ChunkedArray не имеет null_bitmap_data, но Array имеет. ChunkedArray состоит из нескольких Array. Взять один Array: chunked_array->chunk(0); | |
error: 'using element_type = class arrow::ChunkedArray {aka class arrow::ChunkedArray}' has no member named 'null_bitmap_data' | |
const uint8_t* null_bitmap_buff = arr->null_count() == 0 ? nullptr : arr->null_bitmap_data(); | |
Fix: Ошибка фиксится предыдущей. | |
4. https://github.com/apache/arrow/pull/4841 - ARROW-5893: [C++][Python][GLib][Ruby][MATLAB][R] Remove arrow::Column class | |
error: 'Column' is not a member of 'arrow' | |
std::shared_ptr<::arrow::Column> column = table->column(0); | |
Fix: | |
std::shared_ptr<ChunkedArray> chunked_array = out->column(0)->data(); | |
std::shared_ptr<ChunkedArray> chunked_array = out->column(0); | |
5. parquet::arrow::FileReader is abstract class now - ARROW-6065: [C++][Parquet] Clean up parquet/arrow/reader.cc, reduce code duplication, improve readability | |
error: invalid new-expression of abstract class type 'parquet::arrow::FileReader' | |
a_reader->reset(new FileReader(pool, ParquetFileReader::Open(file))); | |
Fix: FileReader::Make(..., std::unique_ptr<FileReader> *) | |
std::unique_ptr<FileReader> arrow_reader; | |
FileReader::Make(..., &arrow_reader); | |
*a_reader = std::move(arrow_reader); | |
6. FromParquetSchema() removed from schema.h - ARROW-6077: [C++][Parquet] Build Arrow "schema tree" from Parquet schema to help with nested data implementation | |
error: no matching function for call to 'FromParquetSchema(const parquet::SchemaDescriptor*&, std::vector<int>&, std::shared_ptr<const arrow::KeyValueMetadata>&, std::shared_ptr<arrow::Schema>*)' | |
parquet::arrow::FromParquetSchema(descr, column_indices, parquet_key_value_metadata, &col_schema); | |
Fix: Use arrow_reader->GetSchema(&col_schema)->field(column_idx)->type(); | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
In file included from hpat/io/_parquet.cpp:19:0: | |
./parquet_reader/hpat_parquet_reader.cpp: In function 'int64_t pq_read_single_file(std::shared_ptr<parquet::arrow::FileReader>, int64_t, uint8_t*, int)': | |
./parquet_reader/hpat_parquet_reader.cpp:90:46: error: no matching function for call to 'parquet::arrow::FileReader::ReadColumn(int64_t&, std::shared_ptr<arrow::Array>*' | |
arrow_reader->ReadColumn(column_idx, &arr); | |
^ | |
In file included from hpat/io/_parquet.cpp:11:0: | |
/home/spokhode/miniconda3/envs/arrow-env/include/parquet/arrow/reader.h:134:27: note: candidate: virtual arrow::Status parquet::arrow::FileReader::ReadColumn(int, std::shared_ptr<arrow::ChunkedArray>*) | |
virtual ::arrow::Status ReadColumn(int i, | |
^~~~~~~~~~ | |
/home/spokhode/miniconda3/envs/arrow-env/include/parquet/arrow/reader.h:134:27: note: no known conversion for argument 2 from 'std::shared_ptr<arrow::Array>*' to 'std::shared_ptr<arrow::ChunkedArray>*' | |
In file included from hpat/io/_parquet.cpp:19:0: | |
./parquet_reader/hpat_parquet_reader.cpp: In function 'int pq_read_parallel_single_file(std::shared_ptr<parquet::arrow::FileReader>, int64_t, uint8_t*, int, int64_t, int64_t)': | |
./parquet_reader/hpat_parquet_reader.cpp:157:34: error: 'Column' is not a member of 'arrow' | |
std::shared_ptr<::arrow::Column> column = table->column(0); | |
^~~~~~ | |
./parquet_reader/hpat_parquet_reader.cpp:157:34: error: 'Column' is not a member of 'arrow' | |
./parquet_reader/hpat_parquet_reader.cpp:157:40: error: template argument 1 is invalid | |
std::shared_ptr<::arrow::Column> column = table->column(0); | |
^ | |
./parquet_reader/hpat_parquet_reader.cpp:157:66: error: cannot convert 'std::shared_ptr<arrow::ChunkedArray>' to 'int' in initialization | |
std::shared_ptr<::arrow::Column> column = table->column(0); | |
^ | |
./parquet_reader/hpat_parquet_reader.cpp:158:68: error: base operand of '->' is not a pointer | |
std::shared_ptr<::arrow::ChunkedArray> chunked_arr = column->data(); | |
^~ | |
./parquet_reader/hpat_parquet_reader.cpp: In function 'int64_t pq_read_string_single_file(std::shared_ptr<parquet::arrow::FileReader>, int64_t, uint32_t**, uint8_t**, uint8_t**, std::vector<unsigned int>*, std::vector<unsigned char>*, std::vector<bool>*)': | |
./parquet_reader/hpat_parquet_reader.cpp:394:46: error: no matching function for call to 'parquet::arrow::FileReader::ReadColumn(int64_t&, std::shared_ptr<arrow::Array>*)' | |
arrow_reader->ReadColumn(column_idx, &arr); | |
^ | |
In file included from hpat/io/_parquet.cpp:11:0: | |
/home/spokhode/miniconda3/envs/arrow-env/include/parquet/arrow/reader.h:134:27: note: candidate: virtual arrow::Status parquet::arrow::FileReader::ReadColumn(int, std::shared_ptr<arrow::ChunkedArray>*) | |
virtual ::arrow::Status ReadColumn(int i, | |
^~~~~~~~~~ | |
/home/spokhode/miniconda3/envs/arrow-env/include/parquet/arrow/reader.h:134:27: note: no known conversion for argument 2 from 'std::shared_ptr<arrow::Array>*' to 'std::shared_ptr<arrow::ChunkedArray>*' | |
In file included from hpat/io/_parquet.cpp:19:0: | |
./parquet_reader/hpat_parquet_reader.cpp: In function 'int pq_read_string_parallel_single_file(std::shared_ptr<parquet::arrow::FileReader>, int64_t, uint32_t**, uint8_t**, uint8_t**, int64_t, int64_t, std::vector<unsigned int>*, std::vector<unsigned char>*, std::vector<bool>*)': | |
./parquet_reader/hpat_parquet_reader.cpp:512:34: error: 'Column' is not a member of 'arrow' | |
std::shared_ptr<::arrow::Column> column = table->column(0); | |
^~~~~~ | |
./parquet_reader/hpat_parquet_reader.cpp:512:34: error: 'Column' is not a member of 'arrow' | |
./parquet_reader/hpat_parquet_reader.cpp:512:40: error: template argument 1 is invalid | |
std::shared_ptr<::arrow::Column> column = table->column(0); | |
^ | |
./parquet_reader/hpat_parquet_reader.cpp:512:66: error: cannot convert 'std::shared_ptr<arrow::ChunkedArray>' to 'int' in initialization | |
std::shared_ptr<::arrow::Column> column = table->column(0); | |
^ | |
./parquet_reader/hpat_parquet_reader.cpp:513:68: error: base operand of '->' is not a pointer | |
std::shared_ptr<::arrow::ChunkedArray> chunked_arr = column->data(); | |
^~ | |
./parquet_reader/hpat_parquet_reader.cpp: In function 'void pq_init_reader(const char*, std::shared_ptr<parquet::arrow::FileReader>*)': | |
./parquet_reader/hpat_parquet_reader.cpp:637:75: error: invalid new-expression of abstract class type 'parquet::arrow::FileReader' | |
a_reader->reset(new FileReader(pool, ParquetFileReader::Open(file))); | |
^ | |
In file included from hpat/io/_parquet.cpp:11:0: | |
/home/spokhode/miniconda3/envs/arrow-env/include/parquet/arrow/reader.h:106:22: note: because the following virtual functions are pure within 'parquet::arrow::FileReader': | |
class PARQUET_EXPORT FileReader { | |
^~~~~~~~~~ | |
/home/spokhode/miniconda3/envs/arrow-env/include/parquet/arrow/reader.h:126:27: note: virtual arrow::Status parquet::arrow::FileReader::GetColumn(int, std::unique_ptr<parquet::arrow::ColumnReader>*) | |
virtual ::arrow::Status GetColumn(int i, std::unique_ptr<ColumnReader>* out) = 0; | |
^~~~~~~~~ | |
/home/spokhode/miniconda3/envs/arrow-env/include/parquet/arrow/reader.h:129:27: note: virtual arrow::Status parquet::arrow::FileReader::GetSchema(std::shared_ptr<arrow::Schema>*) | |
virtual ::arrow::Status GetSchema(std::shared_ptr<::arrow::Schema>* out) = 0; | |
^~~~~~~~~ | |
/home/spokhode/miniconda3/envs/arrow-env/include/parquet/arrow/reader.h:134:27: note: virtual arrow::Status parquet::arrow::FileReader::ReadColumn(int, std::shared_ptr<arrow::ChunkedArray>*) | |
virtual ::arrow::Status ReadColumn(int i, | |
^~~~~~~~~~ | |
/home/spokhode/miniconda3/envs/arrow-env/include/parquet/arrow/reader.h:149:27: note: virtual arrow::Status parquet::arrow::FileReader::ReadSchemaField(int, std::shared_ptr<arrow::ChunkedArray>*) | |
virtual ::arrow::Status ReadSchemaField( | |
^~~~~~~~~~~~~~~ | |
/home/spokhode/miniconda3/envs/arrow-env/include/parquet/arrow/reader.h:155:27: note: virtual arrow::Status parquet::arrow::FileReader::GetRecordBatchReader(const std::vector<int>&, std::unique_ptr<arrow::RecordBatchReader>*) | |
virtual ::arrow::Status GetRecordBatchReader( | |
^~~~~~~~~~~~~~~~~~~~ | |
/home/spokhode/miniconda3/envs/arrow-env/include/parquet/arrow/reader.h:174:27: note: virtual arrow::Status parquet::arrow::FileReader::GetRecordBatchReader(const std::vector<int>&, const std::vector<int>&, std::unique_ptr<arrow::RecordBatchReader>*) | |
virtual ::arrow::Status GetRecordBatchReader( | |
^~~~~~~~~~~~~~~~~~~~ | |
/home/spokhode/miniconda3/envs/arrow-env/include/parquet/arrow/reader.h:190:27: note: virtual arrow::Status parquet::arrow::FileReader::ReadTable(std::shared_ptr<arrow::Table>*) | |
virtual ::arrow::Status ReadTable(std::shared_ptr<::arrow::Table>* out) = 0; | |
^~~~~~~~~ | |
/home/spokhode/miniconda3/envs/arrow-env/include/parquet/arrow/reader.h:195:27: note: virtual arrow::Status parquet::arrow::FileReader::ReadTable(const std::vector<int>&, std::shared_ptr<arrow::Table>*) | |
virtual ::arrow::Status ReadTable(const std::vector<int>& column_indices, | |
^~~~~~~~~ | |
/home/spokhode/miniconda3/envs/arrow-env/include/parquet/arrow/reader.h:198:27: note: virtual arrow::Status parquet::arrow::FileReader::ReadRowGroup(int, const std::vector<int>&, std::shared_ptr<arrow::Table>*) | |
virtual ::arrow::Status ReadRowGroup(int i, const std::vector<int>& column_indices, | |
^~~~~~~~~~~~ | |
/home/spokhode/miniconda3/envs/arrow-env/include/parquet/arrow/reader.h:201:27: note: virtual arrow::Status parquet::arrow::FileReader::ReadRowGroup(int, std::shared_ptr<arrow::Table>*) | |
virtual ::arrow::Status ReadRowGroup(int i, std::shared_ptr<::arrow::Table>* out) = 0; | |
^~~~~~~~~~~~ | |
/home/spokhode/miniconda3/envs/arrow-env/include/parquet/arrow/reader.h:203:27: note: virtual arrow::Status parquet::arrow::FileReader::ReadRowGroups(const std::vector<int>&, const std::vector<int>&, std::shared_ptr<arrow::Table>*) | |
virtual ::arrow::Status ReadRowGroups(const std::vector<int>& row_groups, | |
^~~~~~~~~~~~~ | |
/home/spokhode/miniconda3/envs/arrow-env/include/parquet/arrow/reader.h:207:27: note: virtual arrow::Status parquet::arrow::FileReader::ReadRowGroups(const std::vector<int>&, std::shared_ptr<arrow::Table>*) | |
virtual ::arrow::Status ReadRowGroups(const std::vector<int>& row_groups, | |
^~~~~~~~~~~~~ | |
/home/spokhode/miniconda3/envs/arrow-env/include/parquet/arrow/reader.h:211:27: note: virtual arrow::Status parquet::arrow::FileReader::ScanContents(std::vector<int>, int32_t, int64_t*) | |
virtual ::arrow::Status ScanContents(std::vector<int> columns, | |
^~~~~~~~~~~~ | |
/home/spokhode/miniconda3/envs/arrow-env/include/parquet/arrow/reader.h:217:43: note: virtual std::shared_ptr<parquet::arrow::RowGroupReader> parquet::arrow::FileReader::RowGroup(int) | |
virtual std::shared_ptr<RowGroupReader> RowGroup(int row_group_index) = 0; | |
^~~~~~~~ | |
/home/spokhode/miniconda3/envs/arrow-env/include/parquet/arrow/reader.h:220:15: note: virtual int parquet::arrow::FileReader::num_row_groups() const | |
virtual int num_row_groups() const = 0; | |
^~~~~~~~~~~~~~ | |
/home/spokhode/miniconda3/envs/arrow-env/include/parquet/arrow/reader.h:222:30: note: virtual parquet::ParquetFileReader* parquet::arrow::FileReader::parquet_reader() const | |
virtual ParquetFileReader* parquet_reader() const = 0; | |
^~~~~~~~~~~~~~ | |
/home/spokhode/miniconda3/envs/arrow-env/include/parquet/arrow/reader.h:226:16: note: virtual void parquet::arrow::FileReader::set_use_threads(bool) | |
virtual void set_use_threads(bool use_threads) = 0; | |
^~~~~~~~~~~~~~~ | |
In file included from hpat/io/_parquet.cpp:19:0: | |
./parquet_reader/hpat_parquet_reader.cpp:641:88: error: invalid new-expression of abstract class type 'parquet::arrow::FileReader' | |
a_reader->reset(new FileReader(pool, ParquetFileReader::OpenFile(f_name, false))); | |
^ | |
./parquet_reader/hpat_parquet_reader.cpp: In function 'std::shared_ptr<arrow::DataType> get_arrow_type(std::shared_ptr<parquet::arrow::FileReader>, int64_t)': | |
./parquet_reader/hpat_parquet_reader.cpp:659:101: error: no matching function for call to 'FromParquetSchema(const parquet::SchemaDescriptor*&, std::vector<int>&, std::shared_ptr<const arrow::KeyValueMetadata>&, std::shared_ptr<arrow::Schema>*)' | |
parquet::arrow::FromParquetSchema(descr, column_indices, parquet_key_value_metadata, &col_schema); | |
^ | |
In file included from hpat/io/_parquet.cpp:11:0: | |
/home/spokhode/miniconda3/envs/arrow-env/include/parquet/arrow/reader.h:328:17: note: candidate: arrow::Status parquet::arrow::FromParquetSchema(const parquet::SchemaDescriptor*, const parquet::ArrowReaderProperties&, const std::shared_ptr<const arrow::KeyValueMetadata>&, std::shared_ptr<arrow::Schema>*) | |
::arrow::Status FromParquetSchema( | |
^~~~~~~~~~~~~~~~~ | |
/home/spokhode/miniconda3/envs/arrow-env/include/parquet/arrow/reader.h:328:17: note: no known conversion for argument 2 from 'std::vector<int>' to 'const parquet::ArrowReaderProperties&' | |
/home/spokhode/miniconda3/envs/arrow-env/include/parquet/arrow/reader.h:334:17: note: candidate: arrow::Status parquet::arrow::FromParquetSchema(const parquet::SchemaDescriptor*, const parquet::ArrowReaderProperties&, std::shared_ptr<arrow::Schema>*) | |
::arrow::Status FromParquetSchema(const SchemaDescriptor* parquet_schema, | |
^~~~~~~~~~~~~~~~~ | |
/home/spokhode/miniconda3/envs/arrow-env/include/parquet/arrow/reader.h:334:17: note: candidate expects 3 arguments, 4 provided | |
error: Command "/home/spokhode/miniconda3/envs/arrow-env/bin/x86_64-conda_cos6-linux-gnu-cc -DNDEBUG -fwrapv -O2 -Wall -Wstrict-prototypes -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /home/spokhode/miniconda3/envs/arrow-env/include -DNDEBUG -D_FORTIFY_SOURCE=2 -O2 -isystem /home/spokhode/miniconda3/envs/arrow-env/include -fPIC -DBUILTIN_PARQUET_READER -I. -I/home/spokhode/miniconda3/envs/arrow-env/include -I/home/spokhode/miniconda3/envs/arrow-env/include/python3.7m -c hpat/io/_parquet.cpp -o build/temp.linux-x86_64-3.7/hpat/io/_parquet.o -std=c++11" failed with exit status 1 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment