Skip to content

Instantly share code, notes, and snippets.

@guilt
Last active December 28, 2022 22:28
Show Gist options
  • Save guilt/a09566235536eacc2be0ac04cc88a64a to your computer and use it in GitHub Desktop.
Save guilt/a09566235536eacc2be0ac04cc88a64a to your computer and use it in GitHub Desktop.
Parquet to JSON CLI
//p2j: Parquet to JSON
package main
import (
"encoding/json"
"flag"
"fmt"
log "github.com/sirupsen/logrus"
"os"
"github.com/xitongsys/parquet-go-source/local"
"github.com/xitongsys/parquet-go/reader"
)
func main() {
log.SetOutput(os.Stderr)
var err error
var count = flag.Bool("count", false, "Print number of records")
var file = flag.String("file", "", "Parquet File to Parse")
flag.Parse()
if file == nil || len(*file) == 0 {
flag.Usage()
log.Fatalf("Please specify File")
}
fr, err := local.NewLocalFileReader(*file)
if err != nil {
log.Fatalf("Can't open File: %s", file)
}
pr, err := reader.NewParquetReader(fr, nil, 4)
if err != nil {
log.Fatalf("Can't create Parquet reader: %v", err)
}
num := int(pr.GetNumRows())
if *count {
fmt.Println(num)
return
}
os.Stdout.WriteString("[")
for i := 0; i < num; i++ {
res, err := pr.ReadByNumber(num)
if err != nil {
log.Fatalf("Can't read record: %d %v", num, err)
}
jsonBytes, err := json.MarshalIndent(res, "", " ")
if err != nil {
log.Fatalf("Can't convert to JSON: %v", err)
}
if i > 0 {
os.Stdout.WriteString(",")
}
os.Stdout.Write(jsonBytes)
}
os.Stdout.WriteString("]")
pr.ReadStop()
fr.Close()
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment