Last active
September 25, 2015 00:21
-
-
Save kbourgoin/cf02dfea40d8679c2798 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"bufio" | |
"fmt" | |
"log" | |
"net/url" | |
"os" | |
"strings" | |
"time" | |
) | |
func main() { | |
start := time.Now() | |
file, err := os.Open("./test.log") | |
if err != nil { | |
log.Fatal(err) | |
} | |
defer file.Close() | |
count := 0 | |
scanner := bufio.NewScanner(file) | |
for scanner.Scan() { | |
line := scanner.Text() | |
split := strings.Split(line, " || ") | |
parsed_url, err := url.ParseRequestURI(split[4][4:]) | |
if err != nil { | |
continue | |
} | |
parsed_url.Query() | |
count = count + 1 | |
} | |
fmt.Println(count) | |
fmt.Println(time.Since(start)) | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Python 3 | |
-------- | |
CPython: 12.9s | |
PyPy: 33.0s | |
Python 2.7 | |
---------- | |
CPython: 8.2s | |
PyPy: 3.3s | |
Go | |
-- | |
Go: 2.2s |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import print_function | |
import datetime as dt | |
try: | |
from urlparse import urlparse | |
except ImportError: | |
from urllib import parse as urlparse | |
from cgi import parse_qsl | |
def main(): | |
fname = './test.log' | |
start = dt.datetime.now() | |
count = 0 | |
for i, line in enumerate(open(fname)): | |
parts = line.split(" || ", 9) | |
url = urlparse(parts[4][4:]) | |
parsed = parse_qsl(url.query) | |
count += len(parsed) | |
if i == 100000: | |
break | |
print(dt.datetime.now() - start) | |
print(i) | |
if __name__ == '__main__': | |
main() |
Thanks for the feedback! The file itself was only 100k lines, what's leftover is just cruft from testing.
I'll look into limiting allocations, and see if I can get this moving a bit faster.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
A few comments:
strings.Split
allocates a new slice of strings; in Python this is kind of a normal thing that you don't think about but in Go a lot of times we try to avoid this; it should speed things up a lot to avoid this intermediate allocationmain_test.go
and usetesting.B
and benchmarks.For more on performance tuning in Go I did a talk about this stuff, but the official blog also covers it quite well.