Go script to efficiently truncate the last line of a file. Use it when that 750GB NDJSON dump aborts in the middle of a write.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// remove_last_line efficiently truncates the last line of a file, preserving the final newline. | |
package main | |
import ( | |
"bytes" | |
"flag" | |
"fmt" | |
"os" | |
) | |
var ( | |
dryRun *bool | |
blockSize *int | |
) | |
func main() { | |
dryRun = flag.Bool("dry-run", false, "Don't change file, only print changes") | |
blockSize = flag.Int("block-size", 32768, "Read block size") | |
flag.Parse() | |
if *blockSize <= 0 { | |
println("Invalid block size") | |
os.Exit(2) | |
} | |
args := flag.Args() | |
if len(args) != 1 { | |
println("Expected one argument") | |
os.Exit(2) | |
} | |
if err := run(args[0]); err != nil { | |
println("Failed:", err.Error()) | |
os.Exit(1) | |
} | |
} | |
func run(filePath string) error { | |
// File open mode | |
var mode int | |
if *dryRun { | |
mode = os.O_RDONLY | |
} else { | |
mode = os.O_RDWR | |
} | |
// Open file | |
f, err := os.OpenFile(filePath, mode, 0) | |
if err != nil { | |
return err | |
} | |
defer func() { | |
if err := f.Close(); err != nil { | |
println("Failed to close file", err) | |
} | |
}() | |
// Seek to just before end | |
pos, err := f.Seek(0, 2) | |
if err != nil { | |
return err | |
} | |
if pos == 0 { | |
fmt.Println("No lines found") | |
return nil | |
} | |
pos -= 1 | |
// Backwards-search for newline | |
for { | |
// Read block | |
if pos == 0 { | |
fmt.Println("No lines found") | |
return nil | |
} | |
if int64(*blockSize) > pos { | |
*blockSize = int(pos) - 1 | |
} | |
pos -= int64(*blockSize) | |
buf := make([]byte, *blockSize) | |
if _, err := f.ReadAt(buf, pos); err != nil { | |
return err | |
} | |
// Find newline | |
idx := bytes.LastIndexByte(buf, '\n') | |
if idx >= 0 { | |
// Found newline | |
newSize := pos + int64(idx) + 1 | |
fmt.Println("Truncating to size", newSize) | |
if !*dryRun { | |
return f.Truncate(newSize) | |
} else { | |
return nil | |
} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment