Skip to content

Instantly share code, notes, and snippets.

@yevgenypats
Last active March 3, 2023 10:21
Show Gist options
  • Save yevgenypats/6969e8e598161fc2021612c780bba3eb to your computer and use it in GitHub Desktop.
Save yevgenypats/6969e8e598161fc2021612c780bba3eb to your computer and use it in GitHub Desktop.
apache_arrow_extensions.go
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"fmt"
"reflect"
"github.com/apache/arrow/go/v12/arrow"
"github.com/apache/arrow/go/v12/arrow/array"
"github.com/apache/arrow/go/v12/arrow/memory"
"github.com/google/uuid"
)
// UUIDArray is a simple array which is a FixedSizeBinary(16)
type UUIDArray struct {
array.ExtensionArrayBase
}
func (u UUIDArray) ToUUID() []uuid.UUID {
res := make([]uuid.UUID, u.Len())
arr := u.Storage().(*array.FixedSizeBinary)
for i := 0; i < arr.Len(); i++ {
copy(res[i][:], arr.Value(i))
}
return res
}
// UUIDType is a simple extension type that represents a FixedSizeBinary(16)
// to be used for representing UUIDs
type UUIDType struct {
arrow.ExtensionBase
}
// NewUUIDType is a convenience function to create an instance of UuidType
// with the correct storage type
func NewUUIDType() *UUIDType {
return &UUIDType{
ExtensionBase: arrow.ExtensionBase{
Storage: &arrow.FixedSizeBinaryType{ByteWidth: 16}}}
}
// ArrayType returns TypeOf(UuidArray) for constructing uuid arrays
func (UUIDType) ArrayType() reflect.Type { return reflect.TypeOf(UUIDArray{}) }
func (UUIDType) ExtensionName() string { return "uuid" }
// Serialize returns "uuid-serialized" for testing proper metadata passing
func (UUIDType) Serialize() string { return "uuid-serialized" }
// Deserialize expects storageType to be FixedSizeBinaryType{ByteWidth: 16} and the data to be
// "uuid-serialized" in order to correctly create a UuidType for testing deserialize.
func (UUIDType) Deserialize(storageType arrow.DataType, data string) (arrow.ExtensionType, error) {
if string(data) != "uuid-serialized" {
return nil, fmt.Errorf("type identifier did not match: '%s'", string(data))
}
if !arrow.TypeEqual(storageType, &arrow.FixedSizeBinaryType{ByteWidth: 16}) {
return nil, fmt.Errorf("invalid storage type for UuidType: %s", storageType.Name())
}
return NewUUIDType(), nil
}
// UuidTypes are equal if both are named "uuid"
func (u UUIDType) ExtensionEquals(other arrow.ExtensionType) bool {
return u.ExtensionName() == other.ExtensionName()
}
func main() {
eb := array.NewExtensionBuilder(memory.DefaultAllocator, NewUUIDType())
// when we are building the UUID Array the user need to know about how a UUID is stored internally
// or we need to have something like UUIDToBytes()
eb.StorageBuilder().(*array.FixedSizeBinaryBuilder).AppendValues([][]byte{[]byte("1234567890123456")}, nil)
// While The extension UUID array does have the ability to return the UUIDs as a slice of uuid.UUID
fmt.Println(eb.NewArray().(*UUIDArray).ToUUID())
// I would expect something of the following to work:
// eb.(*UUIDBuilder).AppendValues([][]uuid.UUID{}, nil)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment