Skip to content

Instantly share code, notes, and snippets.

@ianmcook
Last active October 13, 2023 18:04
Show Gist options
  • Save ianmcook/62ad82c17a0bb6cdb4f39d52785c4db9 to your computer and use it in GitHub Desktop.
Save ianmcook/62ad82c17a0bb6cdb4f39d52785c4db9 to your computer and use it in GitHub Desktop.
Test writing and reading a Parquet file with a float16 column
#include <iostream>
#include <arrow/api.h>
#include <arrow/io/api.h>
#include <arrow/util/float16.h>
#include <parquet/arrow/writer.h>
arrow::Status WriteTableToParquetFile() {
std::shared_ptr<arrow::Array> array;
arrow::HalfFloatBuilder builder;
float pi = 3.141592653590;
float tau = 6.283185307180;
ARROW_RETURN_NOT_OK(builder.Append(arrow::util::Float16(pi).bits()));
ARROW_RETURN_NOT_OK(builder.Append(arrow::util::Float16(tau).bits()));
ARROW_RETURN_NOT_OK(builder.Finish(&array));
std::vector<std::shared_ptr<arrow::Array>> arrays;
arrays.push_back(array);
std::vector<std::shared_ptr<arrow::Field>> schema_vector;
schema_vector.push_back(arrow::field("x", arrow::float16()));
auto schema = std::make_shared<arrow::Schema>(schema_vector);
std::shared_ptr<arrow::Table> table = arrow::Table::Make(schema, arrays);
std::shared_ptr<arrow::io::FileOutputStream> outfile;
ARROW_ASSIGN_OR_RAISE(outfile, arrow::io::FileOutputStream::Open("float16.parquet"));
ARROW_RETURN_NOT_OK(parquet::arrow::WriteTable(*table.get(),
arrow::default_memory_pool(), outfile));
return arrow::Status::OK();
}
int main(int, char**) {
auto status = WriteTableToParquetFile();
if (!status.ok()) {
std::cerr << "Error occurred : " << status.message() << std::endl;
return EXIT_FAILURE;
}
return EXIT_SUCCESS;
}
#include <iostream>
#include <arrow/api.h>
#include <arrow/io/api.h>
#include <arrow/util/float16.h>
#include <parquet/arrow/reader.h>
arrow::Status ReadTableFromParquetFile() {
arrow::MemoryPool* pool = arrow::default_memory_pool();
std::shared_ptr<arrow::io::RandomAccessFile> input;
ARROW_ASSIGN_OR_RAISE(input, arrow::io::ReadableFile::Open("float16.parquet"));
std::unique_ptr<parquet::arrow::FileReader> arrow_reader;
ARROW_RETURN_NOT_OK(parquet::arrow::OpenFile(input, pool, &arrow_reader));
std::shared_ptr<arrow::Table> table;
ARROW_RETURN_NOT_OK(arrow_reader->ReadTable(&table));
arrow::PrettyPrintOptions options{0};
ARROW_RETURN_NOT_OK(arrow::PrettyPrint(*table, options, &std::cout));
// Note: The values currently appear as unsigned integers in the printed output.
// To print them as float16 values, use arrow::util::Float16. For example, to
// print the first value in the first column:
std::shared_ptr<arrow::HalfFloatArray> array =
std::static_pointer_cast<arrow::HalfFloatArray>(table->column(0)->chunk(0));
std::cout << std::endl << arrow::util::Float16(array->Value(0)) << std::endl;
return arrow::Status::OK();
}
int main(int, char**) {
auto status = ReadTableFromParquetFile();
if (!status.ok()) {
std::cerr << "Error occurred : " << status.message() << std::endl;
return EXIT_FAILURE;
}
return EXIT_SUCCESS;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment