Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Go script to efficiently truncate the last line of a file. Use it when that 750GB NDJSON dump aborts in the middle of a write.
// remove_last_line efficiently truncates the last line of a file, preserving the final newline.
package main
import (
"bytes"
"flag"
"fmt"
"os"
)
var (
dryRun *bool
blockSize *int
)
func main() {
dryRun = flag.Bool("dry-run", false, "Don't change file, only print changes")
blockSize = flag.Int("block-size", 32768, "Read block size")
flag.Parse()
if *blockSize <= 0 {
println("Invalid block size")
os.Exit(2)
}
args := flag.Args()
if len(args) != 1 {
println("Expected one argument")
os.Exit(2)
}
if err := run(args[0]); err != nil {
println("Failed:", err.Error())
os.Exit(1)
}
}
func run(filePath string) error {
// File open mode
var mode int
if *dryRun {
mode = os.O_RDONLY
} else {
mode = os.O_RDWR
}
// Open file
f, err := os.OpenFile(filePath, mode, 0)
if err != nil {
return err
}
defer func() {
if err := f.Close(); err != nil {
println("Failed to close file", err)
}
}()
// Seek to just before end
pos, err := f.Seek(0, 2)
if err != nil {
return err
}
if pos == 0 {
fmt.Println("No lines found")
return nil
}
pos -= 1
// Backwards-search for newline
for {
// Read block
if pos == 0 {
fmt.Println("No lines found")
return nil
}
if int64(*blockSize) > pos {
*blockSize = int(pos) - 1
}
pos -= int64(*blockSize)
buf := make([]byte, *blockSize)
if _, err := f.ReadAt(buf, pos); err != nil {
return err
}
// Find newline
idx := bytes.LastIndexByte(buf, '\n')
if idx >= 0 {
// Found newline
newSize := pos + int64(idx) + 1
fmt.Println("Truncating to size", newSize)
if !*dryRun {
return f.Truncate(newSize)
} else {
return nil
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment