Skip to content

Instantly share code, notes, and snippets.

@mzaks
Created June 26, 2024 14:12
Show Gist options
  • Save mzaks/1e1a11bc2bcc65fff7a193f32061c496 to your computer and use it in GitHub Desktop.
Save mzaks/1e1a11bc2bcc65fff7a193f32061c496 to your computer and use it in GitHub Desktop.
Arrow Schema
@always_inline
fn indirect(buf: DTypePointer[DType.uint8], pos: Int) -> Int32:
return buf.offset(pos).bitcast[DType.int32]()[0]
@always_inline
fn read[T: DType](buf: DTypePointer[DType.uint8], pos: Int) -> Scalar[T]:
return buf.offset(pos).bitcast[T]()[0]
fn field[T: DType](buf: DTypePointer[DType.uint8], pos: Int, field_offset: Int, default: Scalar[T]) -> Scalar[T]:
var relativ_value_offset =_relative_field_offset(buf, pos, field_offset)
if relativ_value_offset == 0:
return default
return buf.offset(int(pos) + relativ_value_offset).bitcast[T]()[0]
fn field_table(buf: DTypePointer[DType.uint8], pos: Int, field_offset: Int) -> Optional[Int32]:
var relativ_value_offset =_relative_field_offset(buf, pos, field_offset)
if relativ_value_offset == 0:
return None
return int(pos + buf.offset(pos + relativ_value_offset).bitcast[DType.int32]()[0])
fn field_struct(buf: DTypePointer[DType.uint8], pos: Int, field_offset: Int) -> Optional[Int32]:
var relativ_value_offset =_relative_field_offset(buf, pos, field_offset)
if relativ_value_offset == 0:
return None
return pos + relativ_value_offset
fn field_vector(buf: DTypePointer[DType.uint8], pos: Int, field_offset: Int) -> Int:
var relativ_value_offset =_relative_field_offset(buf, pos, field_offset)
if relativ_value_offset == 0:
return 0
return int(pos + buf.offset(pos + relativ_value_offset).bitcast[DType.int32]()[0]) + 4
fn field_vector_len(buf: DTypePointer[DType.uint8], pos: Int, field_offset: Int) -> Int:
var relativ_value_offset =_relative_field_offset(buf, pos, field_offset)
if relativ_value_offset == 0:
return 0
var vec_pos = int(pos + buf.offset(pos + relativ_value_offset).bitcast[DType.int32]()[0])
return int(buf.offset(vec_pos).bitcast[DType.int32]()[0])
fn field_string(buf: DTypePointer[DType.uint8], pos: Int, field_offset: Int) -> StringRef:
var relativ_value_offset =_relative_field_offset(buf, pos, field_offset)
if relativ_value_offset == 0:
return ""
var str_pos = int(pos + buf.offset(pos + relativ_value_offset).bitcast[DType.int32]()[0])
var length = buf.offset(str_pos).bitcast[DType.int32]()[0]
return StringRef(buf.offset(str_pos + 4), int(length))
@always_inline
fn _relative_field_offset(buf: DTypePointer[DType.uint8], pos: Int, field_offset: Int) -> Int:
var relativ_vtable_offset = indirect(buf, pos)
var vtable_pos = pos - relativ_vtable_offset
return int(buf.offset(field_offset).bitcast[DType.uint16]().offset(vtable_pos)[0])
# automatically generated by the FlatBuffers compiler, do not modify
import flatbuffers
@value
struct MetadataVersion:
var _value: Int16
# 0.1.0 (October 2016).
alias V1 = 0
# 0.2.0 (February 2017). Non-backwards compatible with V1.
alias V2 = 1
# 0.3.0 -> 0.7.1 (May - December 2017). Non-backwards compatible with V2.
alias V3 = 2
# >= 0.8.0 (December 2017). Non-backwards compatible with V3.
alias V4 = 3
# >= 1.0.0 (July 2020). Backwards compatible with V4 (V5 readers can read V4
# metadata and IPC messages). Implementations are recommended to provide a
# V4 compatibility mode with V5 format changes disabled.
#
# Incompatible changes between V4 and V5:
# - Union buffer layout has changed. In V5, Unions don't have a validity
# bitmap buffer.
alias V5 = 4
# Represents Arrow Features that might not have full support
# within implementations. This is intended to be used in
# two scenarios:
# 1. A mechanism for readers of Arrow Streams
# and files to understand that the stream or file makes
# use of a feature that isn't supported or unknown to
# the implementation (and therefore can meet the Arrow
# forward compatibility guarantees).
# 2. A means of negotiating between a client and server
# what features a stream is allowed to use. The enums
# values here are intented to represent higher level
# features, additional details maybe negotiated
# with key-value pairs specific to the protocol.
#
# Enums added to this list should be assigned power-of-two values
# to facilitate exchanging and comparing bitmaps for supported
# features.
@value
struct Feature:
var _value: Int64
# Needed to make flatbuffers happy.
alias UNUSED = 0
# The stream makes use of multiple full dictionaries with the
# same ID and assumes clients implement dictionary replacement
# correctly.
alias DICTIONARY_REPLACEMENT = 1
# The stream makes use of compressed bodies as described
# in Message.fbs.
alias COMPRESSED_BODY = 2
@value
struct UnionMode:
var _value: Int16
alias Sparse = 0
alias Dense = 1
@value
struct Precision:
var _value: Int16
alias HALF = 0
alias SINGLE = 1
alias DOUBLE = 2
@value
struct DateUnit:
var _value: Int16
alias DAY = 0
alias MILLISECOND = 1
@value
struct TimeUnit:
var _value: Int16
alias SECOND = 0
alias MILLISECOND = 1
alias MICROSECOND = 2
alias NANOSECOND = 3
@value
struct IntervalUnit:
var _value: Int16
alias YEAR_MONTH = 0
alias DAY_TIME = 1
alias MONTH_DAY_NANO = 2
# ----------------------------------------------------------------------
# Top-level Type value, enabling extensible type-specific metadata. We can
# add new logical types to Type without breaking backwards compatibility
@value
struct Type:
var _value: UInt8
alias NONE = 0
alias Null = 1
alias Int_ = 2
alias FloatingPoint = 3
alias Binary = 4
alias Utf8 = 5
alias Bool_ = 6
alias Decimal = 7
alias Date = 8
alias Time = 9
alias Timestamp = 10
alias Interval = 11
alias List = 12
alias Struct_ = 13
alias Union = 14
alias FixedSizeBinary = 15
alias FixedSizeList = 16
alias Map = 17
alias Duration = 18
alias LargeBinary = 19
alias LargeUtf8 = 20
alias LargeList = 21
alias RunEndEncoded = 22
alias BinaryView = 23
alias Utf8View = 24
alias ListView = 25
alias LargeListView = 26
# ----------------------------------------------------------------------
# Dictionary encoding metadata
# Maintained for forwards compatibility, in the future
# Dictionaries might be explicit maps between integers and values
# allowing for non-contiguous index values
@value
struct DictionaryKind:
var _value: Int16
alias DenseArray = 0
# ----------------------------------------------------------------------
# Endianness of the platform producing the data
@value
struct Endianness:
var _value: Int16
alias Little = 0
alias Big = 1
# These are stored in the flatbuffer in the Type union below
@value
struct Null:
var _buf: DTypePointer[DType.uint8]
var _pos: Int32
fn GetRootAsNull(buf: DTypePointer[DType.uint8]) -> Null:
return Null(buf, flatbuffers.indirect(buf, 0))
# A Struct_ in the flatbuffer metadata is the same as an Arrow Struct
# (according to the physical memory layout). We used Struct_ here as
# Struct is a reserved word in Flatbuffers
@value
struct Struct_:
var _buf: DTypePointer[DType.uint8]
var _pos: Int32
fn GetRootAsStruct_(buf: DTypePointer[DType.uint8]) -> Struct_:
return Struct_(buf, flatbuffers.indirect(buf, 0))
@value
struct List:
var _buf: DTypePointer[DType.uint8]
var _pos: Int32
fn GetRootAsList(buf: DTypePointer[DType.uint8]) -> List:
return List(buf, flatbuffers.indirect(buf, 0))
# Same as List, but with 64-bit offsets, allowing to represent
# extremely large data values.
@value
struct LargeList:
var _buf: DTypePointer[DType.uint8]
var _pos: Int32
fn GetRootAsLargeList(buf: DTypePointer[DType.uint8]) -> LargeList:
return LargeList(buf, flatbuffers.indirect(buf, 0))
# Represents the same logical types that List can, but contains offsets and
# sizes allowing for writes in any order and sharing of child values among
# list values.
@value
struct ListView:
var _buf: DTypePointer[DType.uint8]
var _pos: Int32
fn GetRootAsListView(buf: DTypePointer[DType.uint8]) -> ListView:
return ListView(buf, flatbuffers.indirect(buf, 0))
# Same as ListView, but with 64-bit offsets and sizes, allowing to represent
# extremely large data values.
@value
struct LargeListView:
var _buf: DTypePointer[DType.uint8]
var _pos: Int32
fn GetRootAsLargeListView(buf: DTypePointer[DType.uint8]) -> LargeListView:
return LargeListView(buf, flatbuffers.indirect(buf, 0))
@value
struct FixedSizeList:
var _buf: DTypePointer[DType.uint8]
var _pos: Int32
# Number of list items per value
fn listSize(self) -> Int32:
return flatbuffers.field[DType.int32](self._buf, int(self._pos), 4, 0)
fn GetRootAsFixedSizeList(buf: DTypePointer[DType.uint8]) -> FixedSizeList:
return FixedSizeList(buf, flatbuffers.indirect(buf, 0))
# A Map is a logical nested type that is represented as
#
# List<entries: Struct<key: K, value: V>>
#
# In this layout, the keys and values are each respectively contiguous. We do
# not constrain the key and value types, so the application is responsible
# for ensuring that the keys are hashable and unique. Whether the keys are sorted
# may be set in the metadata for this field.
#
# In a field with Map type, the field has a child Struct field, which then
# has two children: key type and the second the value type. The names of the
# child fields may be respectively "entries", "key", and "value", but this is
# not enforced.
#
# Map
# ```text
# - child[0] entries: Struct
# - child[0] key: K
# - child[1] value: V
# ```
# Neither the "entries" field nor the "key" field may be nullable.
#
# The metadata is structured so that Arrow systems without special handling
# for Map can make Map an alias for List. The "layout" attribute for the Map
# field must have the same contents as a List.
@value
struct Map:
var _buf: DTypePointer[DType.uint8]
var _pos: Int32
# Set to true if the keys within each value are sorted
fn keysSorted(self) -> Scalar[DType.bool]:
return flatbuffers.field[DType.int8](self._buf, int(self._pos), 4, 0)
fn GetRootAsMap(buf: DTypePointer[DType.uint8]) -> Map:
return Map(buf, flatbuffers.indirect(buf, 0))
# A union is a complex type with children in Field
# By default ids in the type vector refer to the offsets in the children
# optionally typeIds provides an indirection between the child offset and the type id
# for each child `typeIds[offset]` is the id used in the type vector
@value
struct Union:
var _buf: DTypePointer[DType.uint8]
var _pos: Int32
fn mode(self) -> UnionMode:
return UnionMode(flatbuffers.field[DType.int16](self._buf, int(self._pos), 4, 0))
fn typeIds(self, i: Int) -> Int32:
return flatbuffers.read[DType.int32](self._buf, flatbuffers.field_vector(self._buf, int(self._pos), 6) + i * 4)
fn typeIds_length(self) -> Int:
return flatbuffers.field_vector_len(self._buf, int(self._pos), 6)
fn GetRootAsUnion(buf: DTypePointer[DType.uint8]) -> Union:
return Union(buf, flatbuffers.indirect(buf, 0))
@value
struct Int_:
var _buf: DTypePointer[DType.uint8]
var _pos: Int32
fn bitWidth(self) -> Int32:
return flatbuffers.field[DType.int32](self._buf, int(self._pos), 4, 0)
fn is_signed(self) -> Scalar[DType.bool]:
return flatbuffers.field[DType.int8](self._buf, int(self._pos), 6, 0)
fn GetRootAsInt_(buf: DTypePointer[DType.uint8]) -> Int_:
return Int_(buf, flatbuffers.indirect(buf, 0))
@value
struct FloatingPoint:
var _buf: DTypePointer[DType.uint8]
var _pos: Int32
fn precision(self) -> Precision:
return Precision(flatbuffers.field[DType.int16](self._buf, int(self._pos), 4, 0))
fn GetRootAsFloatingPoint(buf: DTypePointer[DType.uint8]) -> FloatingPoint:
return FloatingPoint(buf, flatbuffers.indirect(buf, 0))
# Unicode with UTF-8 encoding
@value
struct Utf8:
var _buf: DTypePointer[DType.uint8]
var _pos: Int32
fn GetRootAsUtf8(buf: DTypePointer[DType.uint8]) -> Utf8:
return Utf8(buf, flatbuffers.indirect(buf, 0))
# Opaque binary data
@value
struct Binary:
var _buf: DTypePointer[DType.uint8]
var _pos: Int32
fn GetRootAsBinary(buf: DTypePointer[DType.uint8]) -> Binary:
return Binary(buf, flatbuffers.indirect(buf, 0))
# Same as Utf8, but with 64-bit offsets, allowing to represent
# extremely large data values.
@value
struct LargeUtf8:
var _buf: DTypePointer[DType.uint8]
var _pos: Int32
fn GetRootAsLargeUtf8(buf: DTypePointer[DType.uint8]) -> LargeUtf8:
return LargeUtf8(buf, flatbuffers.indirect(buf, 0))
# Same as Binary, but with 64-bit offsets, allowing to represent
# extremely large data values.
@value
struct LargeBinary:
var _buf: DTypePointer[DType.uint8]
var _pos: Int32
fn GetRootAsLargeBinary(buf: DTypePointer[DType.uint8]) -> LargeBinary:
return LargeBinary(buf, flatbuffers.indirect(buf, 0))
# Logically the same as Utf8, but the internal representation uses a view
# struct that contains the string length and either the string's entire data
# inline (for small strings) or an inlined prefix, an index of another buffer,
# and an offset pointing to a slice in that buffer (for non-small strings).
#
# Since it uses a variable number of data buffers, each Field with this type
# must have a corresponding entry in `variadicBufferCounts`.
@value
struct Utf8View:
var _buf: DTypePointer[DType.uint8]
var _pos: Int32
fn GetRootAsUtf8View(buf: DTypePointer[DType.uint8]) -> Utf8View:
return Utf8View(buf, flatbuffers.indirect(buf, 0))
# Logically the same as Binary, but the internal representation uses a view
# struct that contains the string length and either the string's entire data
# inline (for small strings) or an inlined prefix, an index of another buffer,
# and an offset pointing to a slice in that buffer (for non-small strings).
#
# Since it uses a variable number of data buffers, each Field with this type
# must have a corresponding entry in `variadicBufferCounts`.
@value
struct BinaryView:
var _buf: DTypePointer[DType.uint8]
var _pos: Int32
fn GetRootAsBinaryView(buf: DTypePointer[DType.uint8]) -> BinaryView:
return BinaryView(buf, flatbuffers.indirect(buf, 0))
@value
struct FixedSizeBinary:
var _buf: DTypePointer[DType.uint8]
var _pos: Int32
# Number of bytes per value
fn byteWidth(self) -> Int32:
return flatbuffers.field[DType.int32](self._buf, int(self._pos), 4, 0)
fn GetRootAsFixedSizeBinary(buf: DTypePointer[DType.uint8]) -> FixedSizeBinary:
return FixedSizeBinary(buf, flatbuffers.indirect(buf, 0))
@value
struct Bool_:
var _buf: DTypePointer[DType.uint8]
var _pos: Int32
fn GetRootAsBool_(buf: DTypePointer[DType.uint8]) -> Bool_:
return Bool_(buf, flatbuffers.indirect(buf, 0))
# Contains two child arrays, run_ends and values.
# The run_ends child array must be a 16/32/64-bit integer array
# which encodes the indices at which the run with the value in
# each corresponding index in the values child array ends.
# Like list/struct types, the value array can be of any type.
@value
struct RunEndEncoded:
var _buf: DTypePointer[DType.uint8]
var _pos: Int32
fn GetRootAsRunEndEncoded(buf: DTypePointer[DType.uint8]) -> RunEndEncoded:
return RunEndEncoded(buf, flatbuffers.indirect(buf, 0))
# Exact decimal value represented as an integer value in two's
# complement. Currently only 128-bit (16-byte) and 256-bit (32-byte) integers
# are used. The representation uses the endianness indicated
# in the Schema.
@value
struct Decimal:
var _buf: DTypePointer[DType.uint8]
var _pos: Int32
# Total number of decimal digits
fn precision(self) -> Int32:
return flatbuffers.field[DType.int32](self._buf, int(self._pos), 4, 0)
# Number of digits after the decimal point "."
fn scale(self) -> Int32:
return flatbuffers.field[DType.int32](self._buf, int(self._pos), 6, 0)
# Number of bits per value. The only accepted widths are 128 and 256.
# We use bitWidth for consistency with Int::bitWidth.
fn bitWidth(self) -> Int32:
return flatbuffers.field[DType.int32](self._buf, int(self._pos), 8, 128)
fn GetRootAsDecimal(buf: DTypePointer[DType.uint8]) -> Decimal:
return Decimal(buf, flatbuffers.indirect(buf, 0))
# Date is either a 32-bit or 64-bit signed integer type representing an
# elapsed time since UNIX epoch (1970-01-01), stored in either of two units:
#
# * Milliseconds (64 bits) indicating UNIX time elapsed since the epoch (no
# leap seconds), where the values are evenly divisible by 86400000
# * Days (32 bits) since the UNIX epoch
@value
struct Date:
var _buf: DTypePointer[DType.uint8]
var _pos: Int32
fn unit(self) -> DateUnit:
return DateUnit(flatbuffers.field[DType.int16](self._buf, int(self._pos), 4, 1))
fn GetRootAsDate(buf: DTypePointer[DType.uint8]) -> Date:
return Date(buf, flatbuffers.indirect(buf, 0))
# Time is either a 32-bit or 64-bit signed integer type representing an
# elapsed time since midnight, stored in either of four units: seconds,
# milliseconds, microseconds or nanoseconds.
#
# The integer `bitWidth` depends on the `unit` and must be one of the following:
# * SECOND and MILLISECOND: 32 bits
# * MICROSECOND and NANOSECOND: 64 bits
#
# The allowed values are between 0 (inclusive) and 86400 (=24*60*60) seconds
# (exclusive), adjusted for the time unit (for example, up to 86400000
# exclusive for the MILLISECOND unit).
# This definition doesn't allow for leap seconds. Time values from
# measurements with leap seconds will need to be corrected when ingesting
# into Arrow (for example by replacing the value 86400 with 86399).
@value
struct Time:
var _buf: DTypePointer[DType.uint8]
var _pos: Int32
fn unit(self) -> TimeUnit:
return TimeUnit(flatbuffers.field[DType.int16](self._buf, int(self._pos), 4, 1))
fn bitWidth(self) -> Int32:
return flatbuffers.field[DType.int32](self._buf, int(self._pos), 6, 32)
fn GetRootAsTime(buf: DTypePointer[DType.uint8]) -> Time:
return Time(buf, flatbuffers.indirect(buf, 0))
# Timestamp is a 64-bit signed integer representing an elapsed time since a
# fixed epoch, stored in either of four units: seconds, milliseconds,
# microseconds or nanoseconds, and is optionally annotated with a timezone.
#
# Timestamp values do not include any leap seconds (in other words, all
# days are considered 86400 seconds long).
#
# Timestamps with a non-empty timezone
# ------------------------------------
#
# If a Timestamp column has a non-empty timezone value, its epoch is
# 1970-01-01 00:00:00 (January 1st 1970, midnight) in the *UTC* timezone
# (the Unix epoch), regardless of the Timestamp's own timezone.
#
# Therefore, timestamp values with a non-empty timezone correspond to
# physical points in time together with some additional information about
# how the data was obtained and/or how to display it (the timezone).
#
# For example, the timestamp value 0 with the timezone string "Europe/Paris"
# corresponds to "January 1st 1970, 00h00" in the UTC timezone, but the
# application may prefer to display it as "January 1st 1970, 01h00" in
# the Europe/Paris timezone (which is the same physical point in time).
#
# One consequence is that timestamp values with a non-empty timezone
# can be compared and ordered directly, since they all share the same
# well-known point of reference (the Unix epoch).
#
# Timestamps with an unset / empty timezone
# -----------------------------------------
#
# If a Timestamp column has no timezone value, its epoch is
# 1970-01-01 00:00:00 (January 1st 1970, midnight) in an *unknown* timezone.
#
# Therefore, timestamp values without a timezone cannot be meaningfully
# interpreted as physical points in time, but only as calendar / clock
# indications ("wall clock time") in an unspecified timezone.
#
# For example, the timestamp value 0 with an empty timezone string
# corresponds to "January 1st 1970, 00h00" in an unknown timezone: there
# is not enough information to interpret it as a well-defined physical
# point in time.
#
# One consequence is that timestamp values without a timezone cannot
# be reliably compared or ordered, since they may have different points of
# reference. In particular, it is *not* possible to interpret an unset
# or empty timezone as the same as "UTC".
#
# Conversion between timezones
# ----------------------------
#
# If a Timestamp column has a non-empty timezone, changing the timezone
# to a different non-empty value is a metadata-only operation:
# the timestamp values need not change as their point of reference remains
# the same (the Unix epoch).
#
# However, if a Timestamp column has no timezone value, changing it to a
# non-empty value requires to think about the desired semantics.
# One possibility is to assume that the original timestamp values are
# relative to the epoch of the timezone being set; timestamp values should
# then adjusted to the Unix epoch (for example, changing the timezone from
# empty to "Europe/Paris" would require converting the timestamp values
# from "Europe/Paris" to "UTC", which seems counter-intuitive but is
# nevertheless correct).
#
# Guidelines for encoding data from external libraries
# ----------------------------------------------------
#
# Date & time libraries often have multiple different data types for temporal
# data. In order to ease interoperability between different implementations the
# Arrow project has some recommendations for encoding these types into a Timestamp
# column.
#
# An "instant" represents a physical point in time that has no relevant timezone
# (for example, astronomical data). To encode an instant, use a Timestamp with
# the timezone string set to "UTC", and make sure the Timestamp values
# are relative to the UTC epoch (January 1st 1970, midnight).
#
# A "zoned date-time" represents a physical point in time annotated with an
# informative timezone (for example, the timezone in which the data was
# recorded). To encode a zoned date-time, use a Timestamp with the timezone
# string set to the name of the timezone, and make sure the Timestamp values
# are relative to the UTC epoch (January 1st 1970, midnight).
#
# (There is some ambiguity between an instant and a zoned date-time with the
# UTC timezone. Both of these are stored the same in Arrow. Typically,
# this distinction does not matter. If it does, then an application should
# use custom metadata or an extension type to distinguish between the two cases.)
#
# An "offset date-time" represents a physical point in time combined with an
# explicit offset from UTC. To encode an offset date-time, use a Timestamp
# with the timezone string set to the numeric timezone offset string
# (e.g. "+03:00"), and make sure the Timestamp values are relative to
# the UTC epoch (January 1st 1970, midnight).
#
# A "naive date-time" (also called "local date-time" in some libraries)
# represents a wall clock time combined with a calendar date, but with
# no indication of how to map this information to a physical point in time.
# Naive date-times must be handled with care because of this missing
# information, and also because daylight saving time (DST) may make
# some values ambiguous or nonexistent. A naive date-time may be
# stored as a struct with Date and Time fields. However, it may also be
# encoded into a Timestamp column with an empty timezone. The timestamp
# values should be computed "as if" the timezone of the date-time values
# was UTC; for example, the naive date-time "January 1st 1970, 00h00" would
# be encoded as timestamp value 0.
@value
struct Timestamp:
var _buf: DTypePointer[DType.uint8]
var _pos: Int32
fn unit(self) -> TimeUnit:
return TimeUnit(flatbuffers.field[DType.int16](self._buf, int(self._pos), 4, 0))
# The timezone is an optional string indicating the name of a timezone,
# one of:
#
# * As used in the Olson timezone database (the "tz database" or
# "tzdata"), such as "America/New_York".
# * An absolute timezone offset of the form "+XX:XX" or "-XX:XX",
# such as "+07:30".
#
# Whether a timezone string is present indicates different semantics about
# the data (see above).
fn timezone(self) -> StringRef:
return flatbuffers.field_string(self._buf, int(self._pos), 6)
fn GetRootAsTimestamp(buf: DTypePointer[DType.uint8]) -> Timestamp:
return Timestamp(buf, flatbuffers.indirect(buf, 0))
@value
struct Interval:
var _buf: DTypePointer[DType.uint8]
var _pos: Int32
fn unit(self) -> IntervalUnit:
return IntervalUnit(flatbuffers.field[DType.int16](self._buf, int(self._pos), 4, 0))
fn GetRootAsInterval(buf: DTypePointer[DType.uint8]) -> Interval:
return Interval(buf, flatbuffers.indirect(buf, 0))
@value
struct Duration:
var _buf: DTypePointer[DType.uint8]
var _pos: Int32
fn unit(self) -> TimeUnit:
return TimeUnit(flatbuffers.field[DType.int16](self._buf, int(self._pos), 4, 1))
fn GetRootAsDuration(buf: DTypePointer[DType.uint8]) -> Duration:
return Duration(buf, flatbuffers.indirect(buf, 0))
# ----------------------------------------------------------------------
# user defined key value pairs to add custom metadata to arrow
# key namespacing is the responsibility of the user
@value
struct KeyValue:
var _buf: DTypePointer[DType.uint8]
var _pos: Int32
fn key(self) -> StringRef:
return flatbuffers.field_string(self._buf, int(self._pos), 4)
fn value(self) -> StringRef:
return flatbuffers.field_string(self._buf, int(self._pos), 6)
fn GetRootAsKeyValue(buf: DTypePointer[DType.uint8]) -> KeyValue:
return KeyValue(buf, flatbuffers.indirect(buf, 0))
@value
struct DictionaryEncoding:
var _buf: DTypePointer[DType.uint8]
var _pos: Int32
# The known dictionary id in the application where this data is used. In
# the file or streaming formats, the dictionary ids are found in the
# DictionaryBatch messages
fn id(self) -> Int64:
return flatbuffers.field[DType.int64](self._buf, int(self._pos), 4, 0)
# The dictionary indices are constrained to be non-negative integers. If
# this field is null, the indices must be signed int32. To maximize
# cross-language compatibility and performance, implementations are
# recommended to prefer signed integer types over unsigned integer types
# and to avoid uint64 indices unless they are required by an application.
fn indexType(self) -> Optional[Int_]:
var o = flatbuffers.field_table(self._buf, int(self._pos), 6)
if o:
return Int_(self._buf, o.take())
return None
# By default, dictionaries are not ordered, or the order does not have
# semantic meaning. In some statistical, applications, dictionary-encoding
# is used to represent ordered categorical data, and we provide a way to
# preserve that metadata here
fn isOrdered(self) -> Scalar[DType.bool]:
return flatbuffers.field[DType.int8](self._buf, int(self._pos), 8, 0)
fn dictionaryKind(self) -> DictionaryKind:
return DictionaryKind(flatbuffers.field[DType.int16](self._buf, int(self._pos), 10, 0))
fn GetRootAsDictionaryEncoding(buf: DTypePointer[DType.uint8]) -> DictionaryEncoding:
return DictionaryEncoding(buf, flatbuffers.indirect(buf, 0))
# ----------------------------------------------------------------------
# A field represents a named column in a record / row batch or child of a
# nested type.
@value
struct Field:
var _buf: DTypePointer[DType.uint8]
var _pos: Int32
# Name is not required, in i.e. a List
fn name(self) -> StringRef:
return flatbuffers.field_string(self._buf, int(self._pos), 4)
# Whether or not this field can contain nulls. Should be true in general.
fn nullable(self) -> Scalar[DType.bool]:
return flatbuffers.field[DType.int8](self._buf, int(self._pos), 6, 0)
fn type_type(self) -> Type:
return Type(flatbuffers.field[DType.uint8](self._buf, int(self._pos), 8, 0))
# This is the type of the decoded value if the field is dictionary encoded.
fn type_as_Null(self) -> Null:
return Null(self._buf, flatbuffers.field_table(self._buf, int(self._pos), 10).or_else(0))
fn type_as_Int(self) -> Int_:
return Int_(self._buf, flatbuffers.field_table(self._buf, int(self._pos), 10).or_else(0))
fn type_as_FloatingPoint(self) -> FloatingPoint:
return FloatingPoint(self._buf, flatbuffers.field_table(self._buf, int(self._pos), 10).or_else(0))
fn type_as_Binary(self) -> Binary:
return Binary(self._buf, flatbuffers.field_table(self._buf, int(self._pos), 10).or_else(0))
fn type_as_Utf8(self) -> Utf8:
return Utf8(self._buf, flatbuffers.field_table(self._buf, int(self._pos), 10).or_else(0))
fn type_as_Bool(self) -> Bool_:
return Bool_(self._buf, flatbuffers.field_table(self._buf, int(self._pos), 10).or_else(0))
fn type_as_Decimal(self) -> Decimal:
return Decimal(self._buf, flatbuffers.field_table(self._buf, int(self._pos), 10).or_else(0))
fn type_as_Date(self) -> Date:
return Date(self._buf, flatbuffers.field_table(self._buf, int(self._pos), 10).or_else(0))
fn type_as_Time(self) -> Time:
return Time(self._buf, flatbuffers.field_table(self._buf, int(self._pos), 10).or_else(0))
fn type_as_Timestamp(self) -> Timestamp:
return Timestamp(self._buf, flatbuffers.field_table(self._buf, int(self._pos), 10).or_else(0))
fn type_as_Interval(self) -> Interval:
return Interval(self._buf, flatbuffers.field_table(self._buf, int(self._pos), 10).or_else(0))
fn type_as_List(self) -> List:
return List(self._buf, flatbuffers.field_table(self._buf, int(self._pos), 10).or_else(0))
fn type_as_Struct_(self) -> Struct_:
return Struct_(self._buf, flatbuffers.field_table(self._buf, int(self._pos), 10).or_else(0))
fn type_as_Union(self) -> Union:
return Union(self._buf, flatbuffers.field_table(self._buf, int(self._pos), 10).or_else(0))
fn type_as_FixedSizeBinary(self) -> FixedSizeBinary:
return FixedSizeBinary(self._buf, flatbuffers.field_table(self._buf, int(self._pos), 10).or_else(0))
fn type_as_FixedSizeList(self) -> FixedSizeList:
return FixedSizeList(self._buf, flatbuffers.field_table(self._buf, int(self._pos), 10).or_else(0))
fn type_as_Map(self) -> Map:
return Map(self._buf, flatbuffers.field_table(self._buf, int(self._pos), 10).or_else(0))
fn type_as_Duration(self) -> Duration:
return Duration(self._buf, flatbuffers.field_table(self._buf, int(self._pos), 10).or_else(0))
fn type_as_LargeBinary(self) -> LargeBinary:
return LargeBinary(self._buf, flatbuffers.field_table(self._buf, int(self._pos), 10).or_else(0))
fn type_as_LargeUtf8(self) -> LargeUtf8:
return LargeUtf8(self._buf, flatbuffers.field_table(self._buf, int(self._pos), 10).or_else(0))
fn type_as_LargeList(self) -> LargeList:
return LargeList(self._buf, flatbuffers.field_table(self._buf, int(self._pos), 10).or_else(0))
fn type_as_RunEndEncoded(self) -> RunEndEncoded:
return RunEndEncoded(self._buf, flatbuffers.field_table(self._buf, int(self._pos), 10).or_else(0))
fn type_as_BinaryView(self) -> BinaryView:
return BinaryView(self._buf, flatbuffers.field_table(self._buf, int(self._pos), 10).or_else(0))
fn type_as_Utf8View(self) -> Utf8View:
return Utf8View(self._buf, flatbuffers.field_table(self._buf, int(self._pos), 10).or_else(0))
fn type_as_ListView(self) -> ListView:
return ListView(self._buf, flatbuffers.field_table(self._buf, int(self._pos), 10).or_else(0))
fn type_as_LargeListView(self) -> LargeListView:
return LargeListView(self._buf, flatbuffers.field_table(self._buf, int(self._pos), 10).or_else(0))
# Present only if the field is dictionary encoded.
fn dictionary(self) -> Optional[DictionaryEncoding]:
var o = flatbuffers.field_table(self._buf, int(self._pos), 12)
if o:
return DictionaryEncoding(self._buf, o.take())
return None
# children apply only to nested data types like Struct, List and Union. For
# primitive types children will have length 0.
fn children(self, i: Int) -> Field:
return Field(self._buf, flatbuffers.indirect(self._buf, flatbuffers.field_vector(self._buf, int(self._pos), 14) + i * 4))
fn children_length(self) -> Int:
return flatbuffers.field_vector_len(self._buf, int(self._pos), 14)
# User-defined metadata
fn custom_metadata(self, i: Int) -> KeyValue:
return KeyValue(self._buf, flatbuffers.indirect(self._buf, flatbuffers.field_vector(self._buf, int(self._pos), 16) + i * 4))
fn custom_metadata_length(self) -> Int:
return flatbuffers.field_vector_len(self._buf, int(self._pos), 16)
fn GetRootAsField(buf: DTypePointer[DType.uint8]) -> Field:
return Field(buf, flatbuffers.indirect(buf, 0))
# ----------------------------------------------------------------------
# A Buffer represents a single contiguous memory segment
@value
struct Buffer:
var _buf: DTypePointer[DType.uint8]
var _pos: Int32
# The relative offset into the shared memory page where the bytes for this
# buffer starts
fn offset(self) -> Int64:
return flatbuffers.read[DType.int64](self._buf, int(self._pos) + 0)
# The absolute length (in bytes) of the memory buffer. The memory is found
# from offset (inclusive) to offset + length (non-inclusive). When building
# messages using the encapsulated IPC message, padding bytes may be written
# after a buffer, but such padding bytes do not need to be accounted for in
# the size here.
fn length(self) -> Int64:
return flatbuffers.read[DType.int64](self._buf, int(self._pos) + 8)
# ----------------------------------------------------------------------
# A Schema describes the columns in a row batch
@value
struct Schema:
var _buf: DTypePointer[DType.uint8]
var _pos: Int32
# endianness of the buffer
# it is Little Endian by default
# if endianness doesn't match the underlying system then the vectors need to be converted
fn endianness(self) -> Endianness:
return Endianness(flatbuffers.field[DType.int16](self._buf, int(self._pos), 4, 0))
fn fields(self, i: Int) -> Field:
return Field(self._buf, flatbuffers.indirect(self._buf, flatbuffers.field_vector(self._buf, int(self._pos), 6) + i * 4))
fn fields_length(self) -> Int:
return flatbuffers.field_vector_len(self._buf, int(self._pos), 6)
fn custom_metadata(self, i: Int) -> KeyValue:
return KeyValue(self._buf, flatbuffers.indirect(self._buf, flatbuffers.field_vector(self._buf, int(self._pos), 8) + i * 4))
fn custom_metadata_length(self) -> Int:
return flatbuffers.field_vector_len(self._buf, int(self._pos), 8)
# Features used in the stream/file.
fn features(self, i: Int) -> Feature:
return flatbuffers.read[DType.int64](self._buf, flatbuffers.field_vector(self._buf, int(self._pos), 10) + i * 8)
fn features_length(self) -> Int:
return flatbuffers.field_vector_len(self._buf, int(self._pos), 10)
fn GetRootAsSchema(buf: DTypePointer[DType.uint8]) -> Schema:
return Schema(buf, flatbuffers.indirect(buf, 0))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment