Skip to content

Instantly share code, notes, and snippets.

@s-l-teichmann
Last active July 12, 2018 10:26
Show Gist options
  • Save s-l-teichmann/6b42adc11cf04fd15b074b98046c927b to your computer and use it in GitHub Desktop.
Save s-l-teichmann/6b42adc11cf04fd15b074b98046c927b to your computer and use it in GitHub Desktop.
Splitting files
// convert -- a little file splitter
// (c) 2018 by Sascha L. Teichmann
// This is Free Software covered by the terms of the MIT license.
//
package main
import (
"bufio"
"flag"
"fmt"
"io"
"log"
"os"
"sort"
"strconv"
"strings"
)
type multi struct {
index int64
b byte
seen bool
}
type sink struct {
name string
indices []multi
file *os.File
out *bufio.Writer
}
func (s *sink) consume(index int64, b byte) error {
// open output file lazy.
if s.out == nil {
out, err := os.Create(s.name)
if err != nil {
return err
}
s.file = out
s.out = bufio.NewWriter(out)
}
// Maybe index should be send more than once?
for i := range s.indices {
idx := &s.indices[i]
if index == idx.index {
idx.b = b
idx.seen = true
}
}
// Send all seen at start of slice
// if there indices are before or equal current.
for len(s.indices) > 0 && s.indices[0].seen && s.indices[0].index <= index {
if err := s.out.WriteByte(s.indices[0].b); err != nil {
return err
}
s.indices = s.indices[1:]
}
return nil
}
func (s *sink) close() error {
if s.file != nil {
s.out.Flush()
return s.file.Close()
}
return nil
}
func split(r io.Reader) (err error) {
buf := bufio.NewReader(r)
type entry struct {
index int64
sink *sink
}
var program []entry
sinks := map[string]*sink{}
for {
line, err := buf.ReadString('\n')
if err != nil {
return err
}
// ignore empty lines
if line = strings.TrimSpace(line); len(line) == 0 {
continue
}
if strings.HasPrefix(line, "#") {
break
}
parts := strings.Split(line, ",")
name := parts[0]
snk := sinks[name]
if snk == nil { // allow more than one line per channel
snk = &sink{name: name}
sinks[name] = snk
}
for _, index := range parts[1:] {
index = strings.TrimSpace(index)
idx, err := strconv.ParseInt(index, 10, 64)
if err != nil {
return err
}
if idx < 1 {
return fmt.Errorf("invalid index %d", idx)
}
idx--
snk.indices = append(snk.indices, multi{index: idx})
program = append(program, entry{idx, snk})
}
}
// sort program to bring into sequential order.
sort.Slice(program, func(i, j int) bool {
return program[i].index < program[j].index
})
// dont forget to close the open output files.
defer func() {
for _, snk := range sinks {
if errSnk := snk.close(); errSnk != nil && err == nil {
err = errSnk
}
}
}()
for index := int64(0); len(program) > 0; index++ {
b, err := buf.ReadByte()
if err != nil {
return err
}
// in case there are more than one channel to receive a byte.
for len(program) > 0 && index == program[0].index {
program[0].sink.consume(index, b)
program = program[1:]
}
}
return nil
}
func process(fname string) error {
f, err := os.Open(fname)
if err != nil {
return err
}
defer f.Close()
return split(f)
}
func main() {
flag.Parse()
for _, arg := range flag.Args() {
log.Printf("file: %s\n", arg)
if err := process(arg); err != nil {
log.Fatalf("error: %v\n", err)
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment