Skip to content

Instantly share code, notes, and snippets.

@stoewer
Created June 26, 2023 08:13
Show Gist options
  • Save stoewer/258ed681c0016111b8c8ff618f931351 to your computer and use it in GitHub Desktop.
Save stoewer/258ed681c0016111b8c8ff618f931351 to your computer and use it in GitHub Desktop.
parquet-go GenericWriter should write map keys to matching columns
package main
import (
"errors"
"fmt"
"io"
"log"
"os"
"github.com/segmentio/parquet-go"
)
type Inner struct {
FieldB string
FieldC string
}
type Model struct {
FieldA string
Nested Inner
}
type AltModel struct {
FieldA string
Nested map[string]string
}
var data = []Model{
{FieldA: "oneA", Nested: Inner{FieldB: "1", FieldC: "oneC"}},
{FieldA: "twoA", Nested: Inner{FieldB: "2", FieldC: "twoC"}},
{FieldA: "threeA", Nested: Inner{FieldB: "3", FieldC: "threeC"}},
{FieldA: "fourA", Nested: Inner{FieldB: "4", FieldC: "fourC"}},
{FieldA: "fiveA", Nested: Inner{FieldB: "5", FieldC: "fiveC"}},
}
var altData = []AltModel{
{FieldA: "oneA", Nested: map[string]string{"FieldB": "1", "FieldC": "oneC"}},
{FieldA: "twoA", Nested: map[string]string{"FieldB": "2", "FieldC": "twoC"}},
{FieldA: "threeA", Nested: map[string]string{"FieldB": "3", "FieldC": "threeC"}},
{FieldA: "fourA", Nested: map[string]string{"FieldB": "4", "FieldC": "fourC"}},
{FieldA: "fiveA", Nested: map[string]string{"FieldB": "5", "FieldC": "fiveC"}},
}
func main() {
err := run()
if err != nil {
log.Fatal()
}
}
func run() error {
f, err := os.CreateTemp("", "parquet-test-")
if err != nil {
return err
}
defer func() {
_ = f.Close()
_ = os.Remove(f.Name())
}()
err = write(f)
if err != nil {
return fmt.Errorf("unable to write file '%s': %w", f.Name(), err)
}
err = read(f)
if err != nil {
return fmt.Errorf("unable to read file '%s': %w", f.Name(), err)
}
return nil
}
func write(f *os.File) error {
schema := parquet.SchemaOf(new(Model))
//w := parquet.NewGenericWriter[Model](f, schema)
//defer w.Close()
//_, err := w.Write(data)
w := parquet.NewGenericWriter[AltModel](f, schema)
defer w.Close()
_, err := w.Write(altData)
if err != nil && !errors.Is(err, io.EOF) {
return err
}
return nil
}
func read(f *os.File) error {
dataRead := make([]Model, len(data))
r := parquet.NewGenericReader[Model](f)
var readTotal int
for readTotal < len(data) {
n, err := r.Read(dataRead)
if err != nil && !errors.Is(err, io.EOF) {
return err
}
readTotal += n
}
fmt.Println(r.Schema())
fmt.Printf("Rows read: %d\n", readTotal)
fmt.Printf("Data read: %+v\n", dataRead)
return nil
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment