Skip to content

Instantly share code, notes, and snippets.

@peterhellberg
Last active February 19, 2024 08:25
Show Gist options
  • Save peterhellberg/ceed862a83c29a80b5bbd4631c24fb27 to your computer and use it in GitHub Desktop.
Save peterhellberg/ceed862a83c29a80b5bbd4631c24fb27 to your computer and use it in GitHub Desktop.
Conversion of the "PRO Prisundersökning" from XLSX to CSV, JSON,NDJSON
package main
import (
"encoding/csv"
"encoding/json"
"flag"
"fmt"
"io"
"net/http"
"os"
"strconv"
"strings"
"time"
"github.com/xuri/excelize/v2"
)
const (
defaultRawurl = "https://pro.se/download/18.8d3d3b4187083b364113f70/1679655215923/PRO%20prisunders%C3%B6kning%202022%20Butikslista%20hela%20landet_20230324.xlsx"
defaultFormat = "json"
)
func main() {
if err := run(os.Args, os.Stdout); err != nil {
fmt.Fprintf(os.Stderr, "%s\n", err)
os.Exit(1)
}
}
func run(args []string, stdout io.Writer) error {
rawurl, format, err := parseArgs(args)
if err != nil {
return err
}
client := &http.Client{Timeout: 10 * time.Second}
f, err := getRemoteFile(rawurl, client)
if err != nil {
return err
}
rows, err := f.Rows("Alla")
if err != nil {
return err
}
var records [][]string
for rows.Next() {
row, err := rows.Columns()
if err != nil {
return err
}
records = append(records, row)
}
enc := json.NewEncoder(stdout)
switch format {
case "csv":
csv.NewWriter(stdout).WriteAll(records)
case "json":
enc.SetIndent("", " ")
enc.Encode(convertToButiker(records))
case "ndjson":
for _, butik := range convertToButiker(records) {
enc.Encode(butik)
}
default:
return fmt.Errorf("unknown format")
}
return nil
}
type butik struct {
Butiksnamn string `json:"butiksnamn"`
Distrikt string `json:"distrikt"`
Typ string `json:"typ"`
AntalVaror int `json:"antal_varor"`
Total float64 `json:"total"`
Varor []vara `json:"varor"`
}
func convertToButiker(records [][]string) []butik {
var butiker []butik
if len(records) == 0 {
return nil
}
for _, row := range records[1:] {
if len(row) < 5 {
continue
}
antalVaror, err := strconv.Atoi(row[3])
if err != nil {
continue
}
total, err := strconv.ParseFloat(row[4], 64)
if err != nil {
continue
}
varor, err := parseVaror(records[0][5:], row[5:])
if err != nil {
continue
}
butiker = append(butiker, butik{
Butiksnamn: row[0],
Distrikt: row[1],
Typ: row[2],
AntalVaror: antalVaror,
Total: total,
Varor: varor,
})
}
return butiker
}
type vara struct {
Namn string `json:"namn"`
Pris float64 `json:"pris"`
}
func parseVaror(headings, columns []string) ([]vara, error) {
var varor []vara
for i, v := range columns {
_, namn, found := strings.Cut(headings[i], ". ")
if !found {
return nil, fmt.Errorf("unexpected name")
}
pris, err := strconv.ParseFloat(v, 64)
if err != nil {
return nil, err
}
varor = append(varor, vara{
Namn: namn,
Pris: pris,
})
}
return varor, nil
}
func getRemoteFile(rawurl string, client *http.Client) (*excelize.File, error) {
resp, err := client.Get(rawurl)
if err != nil {
return nil, err
}
defer resp.Body.Close()
return excelize.OpenReader(resp.Body)
}
func parseArgs(args []string) (rawurl, format string, err error) {
flags := flag.NewFlagSet(args[0], flag.ExitOnError)
flags.StringVar(&rawurl, "rawurl", defaultRawurl, "The URL to the PRO Prisundersökning .xlsx to be parsed")
flags.StringVar(&format, "format", defaultFormat, "The format to use for the output (json/ndjson/csv)")
if err := flags.Parse(args[1:]); err != nil {
return "", "", err
}
return rawurl, format, nil
}
@peterhellberg
Copy link
Author

Data from the 2023 report

	defaultRawurl = "https://pro.se/download/18.4d17fc3b18d2a6eee111b331/1706085711958/Butikslista_hela%20landet_2023.xlsx"

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment