Skip to content

Instantly share code, notes, and snippets.

@bom-d-van
Created September 26, 2017 01:43
Show Gist options
  • Save bom-d-van/c73dabc51129175c9cf7561f2cfac6a8 to your computer and use it in GitHub Desktop.
Save bom-d-van/c73dabc51129175c9cf7561f2cfac6a8 to your computer and use it in GitHub Desktop.
Faster File Reading In Go
// +build ignore
package main
import (
"fmt"
"os"
"runtime"
"syscall"
)
func main() {
filename := os.Args[1]
file, err := os.Open(filename)
if err != nil {
panic(err)
}
stat, err := file.Stat()
if err != nil {
panic(err)
}
size := int(stat.Size())
var (
// limit = 256 << 20
chunkSize = size / runtime.NumCPU()
count, read int
countc = make(chan int, runtime.NumCPU())
readc = make(chan int, runtime.NumCPU())
)
// fmt.Printf("size: %d\nchunkSize: %d\ncpu: %d\n", size, chunkSize, runtime.NumCPU())
for i := 0; i < runtime.NumCPU(); i++ {
offset := chunkSize * i
csize := chunkSize
if i == runtime.NumCPU()-1 {
csize = size - chunkSize*i
}
mf := newMmapFile(file.Fd(), offset, csize)
go func() {
count, read := mf.count()
countc <- count
readc <- read
}()
}
for i := 0; i < runtime.NumCPU(); i++ {
count += <-countc
read += <-readc
}
fmt.Printf("total: %d\nread: %d\n", count, read)
}
type mmapFile struct {
fd uintptr
offset int
total int
}
func newMmapFile(fd uintptr, offset int, total int) *mmapFile {
return &mmapFile{fd, offset, total}
}
func (m *mmapFile) count() (int, int) {
var maxBuf = 4 * 1024 * 1024 // 4mb
var count, totalRead int
var size = min(maxBuf, m.total-1)
var offset = m.offset
for offset < m.eof() {
data, err := syscall.Mmap(int(m.fd), int64(offset), size, syscall.PROT_READ, syscall.MAP_SHARED)
if err != nil {
panic(err)
}
offset += size
size = min(maxBuf, m.eof()-offset)
totalRead += len(data)
for i := 0; i < len(data); i++ {
if data[i] == '\n' {
count++
}
}
}
return count, totalRead
}
func (m *mmapFile) eof() int {
return m.offset + m.total
}
func min(a, b int) int {
if a > b {
return b
}
return a
}
// +build ignore
package main
import (
"fmt"
"os"
"runtime"
)
func main() {
filename := os.Args[1]
file, err := os.Open(filename)
if err != nil {
panic(err)
}
stat, err := file.Stat()
if err != nil {
panic(err)
}
size := int(stat.Size())
var (
// limit = 256 << 20
chunkSize = size / runtime.NumCPU()
count, read int
countc = make(chan int, runtime.NumCPU())
readc = make(chan int, runtime.NumCPU())
)
// fmt.Printf("size: %d\nchunkSize: %d\ncpu: %d\n", size, chunkSize, runtime.NumCPU())
for i := 0; i < runtime.NumCPU(); i++ {
offset := chunkSize * i
csize := chunkSize
if i == runtime.NumCPU()-1 {
csize = size - chunkSize*i
}
// println("offset:", offset, "csize:", csize)
mf := newMmapFile(file.Fd(), offset, csize)
// index := i
go func() {
count, read := mf.count()
countc <- count
readc <- read
// println("index:", index, "offset:", offset, "csize:", csize, "read:", read)
}()
}
for i := 0; i < runtime.NumCPU(); i++ {
count += <-countc
read += <-readc
}
fmt.Printf("total: %d\nread: %d\n", count, read)
}
type mmapFile struct {
fd uintptr
offset int
total int
}
func newMmapFile(fd uintptr, offset int, total int) *mmapFile {
return &mmapFile{fd, offset, total}
}
func (m *mmapFile) count() (int, int) {
var maxBuf = 4 * 1024 * 1024 // 4mb
// var maxBuf = 8 // 4mb
var count, totalRead int
var size = min(maxBuf, m.total-1)
var offset = m.offset
var buf = make([]byte, maxBuf)
file, err := os.Open(os.Args[1])
if err != nil {
panic(err)
}
for offset < m.eof() {
// println("size", size, "offset", offset, "eof", m.eof())
// data, err := syscall.Mmap(int(m.fd), int64(offset), size, syscall.PROT_READ, syscall.MAP_SHARED)
data := buf
n, err := file.ReadAt(data, int64(offset))
if err != nil {
panic(err)
}
offset += size
size = min(maxBuf, m.eof()-offset)
totalRead += n
for i := 0; i < n; i++ {
if data[i] == '\n' {
count++
}
}
}
// println("begin", m.offset, "end", offset)
return count, totalRead
}
func (m *mmapFile) eof() int {
return m.offset + m.total
}
func min(a, b int) int {
if a > b {
return b
}
return a
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment