Last active
October 13, 2023 18:04
-
-
Save ianmcook/62ad82c17a0bb6cdb4f39d52785c4db9 to your computer and use it in GitHub Desktop.
Test writing and reading a Parquet file with a float16 column
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <arrow/api.h> | |
#include <arrow/io/api.h> | |
#include <arrow/util/float16.h> | |
#include <parquet/arrow/writer.h> | |
arrow::Status WriteTableToParquetFile() { | |
std::shared_ptr<arrow::Array> array; | |
arrow::HalfFloatBuilder builder; | |
float pi = 3.141592653590; | |
float tau = 6.283185307180; | |
ARROW_RETURN_NOT_OK(builder.Append(arrow::util::Float16(pi).bits())); | |
ARROW_RETURN_NOT_OK(builder.Append(arrow::util::Float16(tau).bits())); | |
ARROW_RETURN_NOT_OK(builder.Finish(&array)); | |
std::vector<std::shared_ptr<arrow::Array>> arrays; | |
arrays.push_back(array); | |
std::vector<std::shared_ptr<arrow::Field>> schema_vector; | |
schema_vector.push_back(arrow::field("x", arrow::float16())); | |
auto schema = std::make_shared<arrow::Schema>(schema_vector); | |
std::shared_ptr<arrow::Table> table = arrow::Table::Make(schema, arrays); | |
std::shared_ptr<arrow::io::FileOutputStream> outfile; | |
ARROW_ASSIGN_OR_RAISE(outfile, arrow::io::FileOutputStream::Open("float16.parquet")); | |
ARROW_RETURN_NOT_OK(parquet::arrow::WriteTable(*table.get(), | |
arrow::default_memory_pool(), outfile)); | |
return arrow::Status::OK(); | |
} | |
int main(int, char**) { | |
auto status = WriteTableToParquetFile(); | |
if (!status.ok()) { | |
std::cerr << "Error occurred : " << status.message() << std::endl; | |
return EXIT_FAILURE; | |
} | |
return EXIT_SUCCESS; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <arrow/api.h> | |
#include <arrow/io/api.h> | |
#include <arrow/util/float16.h> | |
#include <parquet/arrow/reader.h> | |
arrow::Status ReadTableFromParquetFile() { | |
arrow::MemoryPool* pool = arrow::default_memory_pool(); | |
std::shared_ptr<arrow::io::RandomAccessFile> input; | |
ARROW_ASSIGN_OR_RAISE(input, arrow::io::ReadableFile::Open("float16.parquet")); | |
std::unique_ptr<parquet::arrow::FileReader> arrow_reader; | |
ARROW_RETURN_NOT_OK(parquet::arrow::OpenFile(input, pool, &arrow_reader)); | |
std::shared_ptr<arrow::Table> table; | |
ARROW_RETURN_NOT_OK(arrow_reader->ReadTable(&table)); | |
arrow::PrettyPrintOptions options{0}; | |
ARROW_RETURN_NOT_OK(arrow::PrettyPrint(*table, options, &std::cout)); | |
// Note: The values currently appear as unsigned integers in the printed output. | |
// To print them as float16 values, use arrow::util::Float16. For example, to | |
// print the first value in the first column: | |
std::shared_ptr<arrow::HalfFloatArray> array = | |
std::static_pointer_cast<arrow::HalfFloatArray>(table->column(0)->chunk(0)); | |
std::cout << std::endl << arrow::util::Float16(array->Value(0)) << std::endl; | |
return arrow::Status::OK(); | |
} | |
int main(int, char**) { | |
auto status = ReadTableFromParquetFile(); | |
if (!status.ok()) { | |
std::cerr << "Error occurred : " << status.message() << std::endl; | |
return EXIT_FAILURE; | |
} | |
return EXIT_SUCCESS; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment