Skip to content

Instantly share code, notes, and snippets.

@zacharysyoung
Last active July 15, 2023 00:10
Show Gist options
  • Save zacharysyoung/403c4ae9b5ab53bc2fc91a6ea9e9a55c to your computer and use it in GitHub Desktop.
Save zacharysyoung/403c4ae9b5ab53bc2fc91a6ea9e9a55c to your computer and use it in GitHub Desktop.
SO-76690871
operation label query
op1 label1 query1
op1 label1 query2
op1 label2 query3
op2 label3 query4
op2 label4 query5
op2 label3 query6
package main
import (
"encoding/csv"
"encoding/json"
"fmt"
"os"
"reflect"
"sort"
)
type Topic struct {
Label string `json:"label"`
Query string `json:"query"`
}
type JSONObj struct {
Operation string `json:"operation"`
Topic Topic `json:"topic"`
}
func (o JSONObj) key() string { return o.Operation + "@@" + o.Topic.Label }
func main() {
f, err := os.Open("input.csv")
must(err)
r := csv.NewReader(f)
header, err := r.Read() // discard header
if err != nil {
panic(fmt.Errorf("unexpectedly done reading input.csv: %v", err))
}
if !reflect.DeepEqual(header, []string{"operation", "label", "query"}) {
panic(fmt.Errorf("header %v != %v", header, []string{"operation", "label", "query"}))
}
records, err := r.ReadAll()
must(err)
objects := make([]JSONObj, 0)
for _, record := range records {
objects = append(objects, JSONObj{
Operation: record[0],
Topic: Topic{Label: record[1], Query: record[2]},
})
}
f, err = os.Create("output-flat.json")
must(err)
b, err := json.MarshalIndent(objects, "", " ")
must(err)
f.Write(b)
must(f.Close())
sort.Slice(objects, func(i, j int) bool { return objects[i].key() < objects[j].key() })
groupedObjs := make(map[string][]JSONObj)
key := ""
for _, obj := range objects {
if obj.key() != key {
key = obj.key()
groupedObjs[key] = make([]JSONObj, 0)
}
groupedObjs[key] = append(groupedObjs[key], obj)
}
f, err = os.Create("output-grouped.json")
must(err)
b, err = json.MarshalIndent(groupedObjs, "", " ")
must(err)
f.Write(b)
must(f.Close())
}
func must(err error) {
if err != nil {
panic(err)
}
}
import csv
import json
from typing import Any
Row = dict[str, str]
JSONObj = dict[str, Any]
with open("input.csv", newline="", encoding="utf-8") as f:
reader = csv.DictReader(f)
rows: list[Row] = list(reader)
print(rows)
# [
# {"operation": "op1", "label": "label1", "query": "query1"},
# {"operation": "op1", "label": "label1", "query": "query2"},
# {"operation": "op1", "label": "label2", "query": "query3"},
# {"operation": "op2", "label": "label3", "query": "query4"},
# {"operation": "op2", "label": "label4", "query": "query5"},
# {"operation": "op2", "label": "label3", "query": "query6"},
# ]
data_flat: list[JSONObj] = []
for row in rows:
obj = {
"operation": row["operation"],
"topic": {
"label": row["label"],
"query": row["query"],
},
}
data_flat.append(obj)
with open("output-flat.json", "w", encoding="utf-8") as f:
json.dump(data_flat, f, indent=4)
from itertools import groupby
def key_func(row: Row):
return (row["operation"], row["label"])
rows.sort(key=key_func)
grouped_iter = groupby(rows, key=key_func)
data_grouped: JSONObj = {}
for key, grouped_rows in grouped_iter:
grouped_list: list[JSONObj] = []
for row in grouped_rows:
obj = {
"operation": row["operation"],
"topic": {
"label": row["label"],
"query": row["query"],
},
}
grouped_list.append(obj)
data_grouped["@@".join(key)] = grouped_list
with open("output-grouped.json", "w", encoding="utf-8") as f:
json.dump(data_grouped, f, indent=4)
[
{
"operation": "op1",
"topic": {
"label": "label1",
"query": "query1"
}
},
{
"operation": "op1",
"topic": {
"label": "label1",
"query": "query2"
}
},
{
"operation": "op1",
"topic": {
"label": "label2",
"query": "query3"
}
},
{
"operation": "op2",
"topic": {
"label": "label3",
"query": "query4"
}
},
{
"operation": "op2",
"topic": {
"label": "label4",
"query": "query5"
}
},
{
"operation": "op2",
"topic": {
"label": "label3",
"query": "query6"
}
}
]
{
"op1@@label1": [
{
"operation": "op1",
"topic": {
"label": "label1",
"query": "query1"
}
},
{
"operation": "op1",
"topic": {
"label": "label1",
"query": "query2"
}
}
],
"op1@@label2": [
{
"operation": "op1",
"topic": {
"label": "label2",
"query": "query3"
}
}
],
"op2@@label3": [
{
"operation": "op2",
"topic": {
"label": "label3",
"query": "query4"
}
},
{
"operation": "op2",
"topic": {
"label": "label3",
"query": "query6"
}
}
],
"op2@@label4": [
{
"operation": "op2",
"topic": {
"label": "label4",
"query": "query5"
}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment