zacharysyoung/README.md

## README.md

      
    Raw
  

              README.md
            
          
    Split rows, align columns

I went for a solution that doesn't presuppose any kind of sorting: it just looks for a value and remembers in which column (on any row) it appeared.
Starting with this input:
a,b,a
c,c,b
d,e,e
f,f,g
g,g,h
h,h,i

It generates a map of values to positions:
{
    "a": [0, 2],
    "b": [1, 2],
    "c": [0, 1],
    "d": [0],
    "e": [1, 2],
    "f": [0, 1],
    "g": [2, 0, 1],
    "h": [2, 0, 1],
    "i": [2],
}
From the map, it iterates over the values and writes each value into its own row, in the found positions (columns):
a,,a
,b,b
c,c,
d,,
,e,e
f,f,
g,g,g
h,h,h
,,i

Since it doesn't presuppose sorting, this:
b,a,a
d,d,d
c,e,e

becomes:
{
    "b": [0],
    "a": [1, 2],
    "d": [0, 1, 2],
    "c": [0],
    "e": [1, 2],
}
becomes:
b,,
,a,a
d,d,d
c,,
,e,e

This (formatted for legibility):
| ABCDE12345_001  | FGHIJ6789_002   | ABCDE12345_001 |
| KLMNO5432_003   | KLMNO5432_003   | FGHIJ6789_002  |
| PQRST24680_123  | UVWXY13579_555  | UVWXY13579_555 |
| ZABCD876530_009 | ZABCD876530_009 | AABBCCDDEE_987 |
| AABBCCDDEE_987  | AABBCCDDEE_987  | LMNOP98765_999 |
| LMNOP98765_999  | ZYXWV54321_777  | ZYXWV54321_777 |

becomes:
| ABCDE12345_001  |                 | ABCDE12345_001 |
|                 | FGHIJ6789_002   | FGHIJ6789_002  |
| KLMNO5432_003   | KLMNO5432_003   |                |
| PQRST24680_123  |                 |                |
|                 | UVWXY13579_555  | UVWXY13579_555 |
| ZABCD876530_009 | ZABCD876530_009 |                |
| AABBCCDDEE_987  | AABBCCDDEE_987  | AABBCCDDEE_987 |
| LMNOP98765_999  |                 | LMNOP98765_999 |
|                 | ZYXWV54321_777  | ZYXWV54321_777 |

I've include the program in Python, and Go (because I really like writing Go).

  
## main.go
package main

import (
	"encoding/csv"
	"fmt"
	"io"
	"os"
)

func main() {
	const nCols = 3

	values := make([]string, 0)
	valuePositions := make(map[string][]int)

	f, err := os.Open("input3.csv")
	must(err)
	r := csv.NewReader(f)
	for {
		record, err := r.Read()
		if doneReading(err) {
			break
		}
		for i, val := range record[:nCols] {
			if _, ok := valuePositions[val]; !ok {
				values = append(values, val)
			}
			valuePositions[val] = append(valuePositions[val], i)
		}
	}
	must(f.Close())

	fmt.Println(values)
	fmt.Println(valuePositions)

	f, err = os.Create("output.csv")
	must(err)
	w := csv.NewWriter(f)
	for _, val := range values {
		row := make([]string, nCols)
		for _, x := range valuePositions[val] {
			row[x] = val
		}
		w.Write(row)
	}
	w.Flush()
	must(w.Error())
	must(f.Close())

}

func doneReading(err error) bool {
	if err != nil {
		if err == io.EOF {
			return true
		}
		panic(err)
	}
	return false
}

func must(err error) {
	if err != nil {
		panic(err)
	}
}

## main.py
import csv
from collections import defaultdict

value_positions = defaultdict(list)

with open("input4.csv", newline="", encoding="utf-8") as f:
    reader = csv.reader(f)
    for row in reader:
        for i, val in enumerate(row):
            value_positions[val].append(i)

N_COLS = 3

with open("output.csv", "w", newline="", encoding="utf-8") as f:
    writer = csv.writer(f)
    for val, positions in value_positions.items():
        row = [""] * N_COLS
        for x in positions:
            row[x] = val
        writer.writerow(row)
	package main

	import (
	"encoding/csv"
	"fmt"
	"io"
	"os"
	)

	func main() {
	const nCols = 3

	values := make([]string, 0)
	valuePositions := make(map[string][]int)

	f, err := os.Open("input3.csv")
	must(err)
	r := csv.NewReader(f)
	for {
	record, err := r.Read()
	if doneReading(err) {
	break
	}
	for i, val := range record[:nCols] {
	if _, ok := valuePositions[val]; !ok {
	values = append(values, val)
	}
	valuePositions[val] = append(valuePositions[val], i)
	}
	}
	must(f.Close())

	fmt.Println(values)
	fmt.Println(valuePositions)

	f, err = os.Create("output.csv")
	must(err)
	w := csv.NewWriter(f)
	for _, val := range values {
	row := make([]string, nCols)
	for _, x := range valuePositions[val] {
	row[x] = val
	}
	w.Write(row)
	}
	w.Flush()
	must(w.Error())
	must(f.Close())

	}

	func doneReading(err error) bool {
	if err != nil {
	if err == io.EOF {
	return true
	}
	panic(err)
	}
	return false
	}

	func must(err error) {
	if err != nil {
	panic(err)
	}
	}
	import csv
	from collections import defaultdict

	value_positions = defaultdict(list)

	with open("input4.csv", newline="", encoding="utf-8") as f:
	reader = csv.reader(f)
	for row in reader:
	for i, val in enumerate(row):
	value_positions[val].append(i)

	N_COLS = 3

	with open("output.csv", "w", newline="", encoding="utf-8") as f:
	writer = csv.writer(f)
	for val, positions in value_positions.items():
	row = [""] * N_COLS
	for x in positions:
	row[x] = val
	writer.writerow(row)