Skip to content

Instantly share code, notes, and snippets.

@asmedrano
Created March 11, 2014 03:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save asmedrano/9478830 to your computer and use it in GitHub Desktop.
Save asmedrano/9478830 to your computer and use it in GitHub Desktop.
Split a file into several files of given bytesize. Try not to leave orphaned rows.
package main
import (
"fmt"
"os"
"bytes"
"errors"
"sync"
"path/filepath"
"strconv"
)
var EOF = errors.New("EOF")
func main() {
if len(os.Args) < 4 {
fmt.Println("USAGE: ./splitfile <file> <outdir> <bytesize>")
os.Exit(1)
}
byteSize, err := strconv.ParseInt(os.Args[3], 0, 64)
if err != nil{
fmt.Println("Invalid <bytesize>")
os.Exit(1)
}
targDir, err := filepath.Abs(os.Args[2])
if err != nil{
fmt.Println("Invalid <outdir>")
os.Exit(1)
}
f, err := os.Open(os.Args[1])
if err != nil {
panic(err)
}
defer f.Close()
slices := [][]byte{} // create containing byte slice structures
var i int64 = 0
var lastLen int64 = 0
// create a buffer to accept a limited amount of chunks
buf := make([]byte, byteSize)
nl := []byte("\n")
// while loop over file till we EOF
for {
// seek the file back to make up for orphaned rows
f.Seek(i*byteSize - lastLen, 0)
nr, er := f.Read(buf)
if nr > 0{
// split the bytes by newline so we can remove last element.We dont wanna have any orphaned csv lines
split := bytes.Split(buf, nl)
sl := len(split)
// the offset to pick up elements the next iteration
lastLen += int64(len(split[sl-1]))
slices = append(slices, bytes.Join(split[0:sl-1], nl)) // this puts the bytes back together after we cleand them up
}
if er == EOF {
break
}
if er != nil {
err = er
break
}
i++
}
var wg sync.WaitGroup
for i, _ := range slices {
// finally write the bytes to new files spawing new go routines to do it.
wg.Add(1)
go func(i int) {
file, err := os.OpenFile(fmt.Sprintf("/%v/%d.out", targDir, i), os.O_CREATE|os.O_WRONLY, 0777)
defer file.Close()
if err != nil {
fmt.Println("FAIL!")
return
}
_, err = file.Write(slices[i])
if err != nil{
fmt.Println("Fail at writing")
return
}
defer wg.Done()
}(i)
}
wg.Wait()
fmt.Println("Done.")
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment