Created
November 27, 2020 05:14
-
-
Save ananis25/0b645ef94a70a0834fd23177e8721be9 to your computer and use it in GitHub Desktop.
adding metadata to arrow field using the C API
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdlib.h> | |
#include <arrow-glib/arrow-glib.h> | |
int main(int argc, char **argv) | |
{ | |
int success = EXIT_FAILURE; | |
GError *error = NULL; | |
GArrowMemoryMappedInputStream *input; | |
GArrowRecordBatchFileReader *reader; | |
const char *input_path = "ext.arrow"; | |
input = garrow_memory_mapped_input_stream_new(input_path, &error); | |
if (!input) { | |
g_print("failed to open file: %s\n", error->message); | |
g_error_free(error); | |
goto cleanup; | |
} | |
reader = garrow_record_batch_file_reader_new(GARROW_SEEKABLE_INPUT_STREAM(input), &error); | |
if (!reader) { | |
g_print("failed to open file reader: %s\n", error->message); | |
g_error_free(error); | |
goto cleanup; | |
} | |
GArrowRecordBatch *record_batch = garrow_record_batch_file_reader_read_record_batch(reader, 0, &error); | |
if (!record_batch) { | |
g_print("failed to open file reader: %s\n", error->message); | |
g_error_free(error); | |
goto cleanup; | |
} | |
GArrowSchema *schema = garrow_record_batch_get_schema(record_batch); | |
gboolean true_val = 1; | |
g_print(garrow_schema_to_string_metadata(schema, true_val)); | |
GHashTable * metadata = garrow_schema_get_metadata(schema); | |
g_print("size of schema metadata: %d", g_hash_table_size(metadata)); | |
g_object_unref(record_batch); | |
success = EXIT_SUCCESS; | |
goto cleanup; | |
cleanup: | |
g_object_unref(reader); | |
g_object_unref(input); | |
return success; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pyarrow as pa | |
import pandas as pd | |
import numpy as np | |
# copied from pyarrow tests | |
class PeriodType(pa.ExtensionType): | |
def __init__(self, freq): | |
self._freq = freq | |
pa.ExtensionType.__init__(self, pa.int64(), 'test.period') | |
@property | |
def freq(self): | |
return self._freq | |
def __arrow_ext_serialize__(self): | |
return "freq={}".format(self.freq).encode() | |
@classmethod | |
def __arrow_ext_deserialize__(cls, storage_type, serialized): | |
serialized = serialized.decode() | |
assert serialized.startswith("freq=") | |
freq = serialized.split('=')[1] | |
return PeriodType(freq) | |
period_type = PeriodType('D') | |
storage = pa.array(np.arange(1000000), pa.int64()) | |
arr = pa.ExtensionArray.from_storage(period_type, storage) | |
batch = pa.RecordBatch.from_arrays([arr,], ["ext"]) | |
table = pa.Table.from_batches([batch]) | |
filename = "ext.arrow" | |
stream = pa.OSFile(filename, "wb") | |
writer = pa.ipc.new_file(stream, table.schema) | |
writer.write_table(table) | |
writer.close() | |
stream.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment