Skip to content

Instantly share code, notes, and snippets.

@itchyny
Created August 30, 2019 06:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save itchyny/2fd38189bf6886dbb0b3a00e592eae1a to your computer and use it in GitHub Desktop.
Save itchyny/2fd38189bf6886dbb0b3a00e592eae1a to your computer and use it in GitHub Desktop.
package main
import (
"encoding/binary"
"errors"
"fmt"
"io"
"os"
"strconv"
)
const step = 100000
func main() {
if len(os.Args) != 3 {
fmt.Fprintf(os.Stderr, "specify file and line\n")
os.Exit(1)
}
line, err := strconv.ParseUint(os.Args[2], 10, 64)
if err != nil {
fmt.Fprintf(os.Stderr, "%s\n", err)
os.Exit(1)
}
if err := run(os.Args[1], line-1); err != nil {
fmt.Fprintf(os.Stderr, "%s\n", err)
os.Exit(1)
}
}
func run(name string, line uint64) error {
pos, err := readPos(name, line/step)
if err != nil {
if !os.IsNotExist(err) {
return err
}
positions := scanPos(name)
if err = savePos(name, positions); err != nil {
return err
}
if line/step >= uint64(len(positions)) {
return errors.New("line too large")
}
pos = positions[line/step]
}
diff := line % step
f, err := os.Open(name)
if err != nil {
return err
}
defer f.Close()
_, err = f.Seek(int64(pos), io.SeekStart)
if err != nil {
return err
}
bs := make([]byte, 1024*1024)
var l uint64
var j, k int
for {
n, err := f.Read(bs)
if n > 0 {
for i := 0; i < n; i++ {
if bs[i] == '\n' {
if k == 0 {
k = -1
}
j, k = k+1, i
l++
if diff+1 == l {
fmt.Printf("%s\n", bs[j:k])
return nil
}
}
}
if diff+1 == l {
fmt.Printf("%s\n", bs[j:])
return nil
}
}
if err != nil {
return nil
}
}
}
func scanPos(name string) []uint64 {
positions := []uint64{0}
f, err := os.Open(name)
if err != nil {
return positions
}
defer f.Close()
var line, pos uint64
bs := make([]byte, 1024*1024)
for {
n, err := f.Read(bs)
if n > 0 {
for i := 0; i < n; i++ {
if bs[i] == '\n' {
line++
if line%step == 0 {
positions = append(positions, pos+uint64(i)+1)
}
}
}
pos += uint64(n)
}
if err != nil {
return positions
}
}
}
func readPos(name string, n uint64) (uint64, error) {
posFile := name + ".pos"
f, err := os.Open(posFile)
if err != nil {
return 0, err
}
defer f.Close()
var pos uint64
_, err = f.Seek(int64(n*8), io.SeekStart)
if err != nil {
return 0, err
}
err = binary.Read(f, binary.LittleEndian, &pos)
if err != nil {
if err == io.EOF {
return 0, errors.New("line too large")
}
return 0, err
}
return pos, nil
}
func savePos(name string, positions []uint64) error {
posFile := name + ".pos"
f, err := os.OpenFile(posFile, os.O_RDWR|os.O_CREATE|os.O_EXCL, 0644)
if err != nil {
return err
}
defer f.Close()
for _, pos := range positions {
if err := binary.Write(f, binary.LittleEndian, pos); err != nil {
return err
}
}
return nil
}
@itchyny
Copy link
Author

itchyny commented Aug 30, 2019

  • Creates a pos file which stores the bytes offset of each line by steps.
  • Seek the pos file and read the offset.
  • Seek the target file by offset.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment