Skip to content

Instantly share code, notes, and snippets.

@M-Porter
Last active March 1, 2024 22:48
Show Gist options
  • Save M-Porter/97d088c2594725f6af3bf3a9d175cf3b to your computer and use it in GitHub Desktop.
Save M-Porter/97d088c2594725f6af3bf3a9d175cf3b to your computer and use it in GitHub Desktop.
Like split, but slower!
package main
import (
"bufio"
"bytes"
"flag"
"fmt"
"io"
"os"
"path/filepath"
)
type outFile struct {
name string
ext string
sequence int
}
func (o *outFile) nextFilename() string {
current := o.sequence
o.sequence++
return fmt.Sprintf("%s_%d%s", o.name, current, o.ext)
}
func parseOutFileString(out string) outFile {
ext := filepath.Ext(out)
return outFile{
name: out[0 : len(out)-len(ext)],
ext: ext,
}
}
func main() {
var (
lines int
headers bool
skip int
inFp string
outFp string
)
flag.IntVar(&lines, "l", 0, "Number of lines per file, not including the header line.")
flag.IntVar(&skip, "s", 0, "Number of lines to skip at the beginning of the files.")
flag.BoolVar(&headers, "h", false, "First line is a header, subsequently include it in output files.")
flag.StringVar(&inFp, "in", "", "Input file")
flag.StringVar(&outFp, "out", "", "Input file")
flag.Parse()
if lines == 0 {
fmt.Println("-l flag value must be greater than 0")
os.Exit(1)
}
if inFp == "" || outFp == "" {
fmt.Println("Input and output files expected")
os.Exit(1)
}
inFile, err := os.Open(inFp)
if err != nil {
fmt.Println(err)
os.Exit(1)
}
defer inFile.Close()
outFile := parseOutFileString(outFp)
currentLine := -1
currentChunkSize := 0
var headerLine []byte
var of *os.File
reader := bufio.NewReader(inFile)
for {
line, err := reader.ReadBytes('\n')
if err == io.EOF {
break
}
if err != nil {
fmt.Println(err)
os.Exit(1)
}
currentLine++
if currentLine < skip {
continue
}
if headers && headerLine == nil {
headerLine = bytes.Clone(line)
continue
}
if of == nil {
nf := outFile.nextFilename()
fmt.Printf("Writing chunk to %s...", nf)
of, err = os.Create(nf)
defer of.Close()
if err != nil {
fmt.Println(err)
os.Exit(1)
}
}
if currentChunkSize == 0 && headerLine != nil {
of.Write(headerLine)
}
of.Write(line)
currentChunkSize++
if currentChunkSize == lines {
fmt.Println("done")
of.Close()
of = nil
currentChunkSize = 0
}
}
of.Close()
fmt.Println("done")
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment