Skip to content

Instantly share code, notes, and snippets.

@MirkoDziadzka
Last active August 29, 2015 14:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save MirkoDziadzka/a941b46e0b66035f1129 to your computer and use it in GitHub Desktop.
Save MirkoDziadzka/a941b46e0b66035f1129 to your computer and use it in GitHub Desktop.
Python vs. Go performance ... reading a set of values from a file
import time
def readFile(filename):
res = {}
for line in open(filename).read().split("\n"):
if line:
res[line] = True
return res
if __name__ == '__main__':
start_time = time.time()
res = readFile("hash.db")
duration = time.time() - start_time
print "reading %d entries in %f seconds" % (len(res), duration)
package main
import (
"fmt"
"os"
"time"
"bytes"
)
type set map[string]bool
func readFile(filename string) set {
file, err := os.Open(filename)
if err != nil {
panic(err)
}
stat, err := file.Stat()
if err != nil {
panic(err)
}
buffer := make([]byte, stat.Size())
_,err = file.Read(buffer)
if err != nil {
panic(err)
}
res := make(set)
for _,value := range bytes.Split(buffer, []byte("\n")) {
s := string(value)
if s != "" {
res[s] = true
}
}
return res
}
func main() {
start_time := time.Now()
res := readFile("hash.db")
duration := time.Now().Sub(start_time)
fmt.Printf("read %d entries in %s\n", len(res), duration)
}
import time
def readFile(filename):
res = set()
for line in open(filename).read().split("\n"):
if line:
res.add(line)
return res
if __name__ == '__main__':
start_time = time.time()
res = readFile("hash.db")
duration = time.time() - start_time
print "reading %d entries in %f seconds" % (len(res), duration)
import hashlib
def makeSet(size):
res = set()
for i in range(size):
value = hashlib.sha1(str(i)).hexdigest()
res.add(value)
return res
def makeFile(filename, size):
data = makeSet(size)
open(filename,"w").write("\n".join(data) + '\n')
if __name__ == '__main__':
makeFile("hash.db", 1000 * 1000)
@MirkoDziadzka
Copy link
Author

Ok ... the hint on the mailing list was give a size hint to make: So the cache-improved.go is doing this and is now on the same performance level as the Python implementation.

Go improved output: read 1000000 entries in 468.880726ms

package main

import (
    "fmt"
    "os"
    "time"
    "bytes"
)


type set map[string]struct{}

func readFile(filename string) set {
    file, err := os.Open(filename)
    if err != nil {
        panic(err)
    }
    stat, err := file.Stat()
    if err != nil {
        panic(err)
    }
    buffer := make([]byte, stat.Size())
    _,err = file.Read(buffer)
    if err != nil {
        panic(err)
    }

    splitBuffer := bytes.Split(buffer, []byte("\n"))
    res := make(set, len(splitBuffer))

    for _,value  := range splitBuffer {
        s := string(value)
        if s != "" {
            res[s] = struct{}{}
        }
    }
    return res
}

func main() {
    start_time := time.Now()
    res := readFile("hash.db")
    duration := time.Now().Sub(start_time)
    fmt.Printf("read %d entries in %s\n", len(res), duration)
}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment