Skip to content

Instantly share code, notes, and snippets.

@iffy
Last active September 21, 2018 19:33
Show Gist options
  • Save iffy/ede3fb5b637b34566ecd36c447995c75 to your computer and use it in GitHub Desktop.
Save iffy/ede3fb5b637b34566ecd36c447995c75 to your computer and use it in GitHub Desktop.
Comparing line-reading in Python/Nim/Node
hugefile.txt
myreader
builtinreader
hugebin.txt
hugestr.txt

This tests different line-reading implementations in Node, Python and Nim.

Conclusions

  • Nobody agrees on what a "line" is :) Except wc and python
  • Python is fastest, Node next, Nim last

Output from running make:

---hugebin.txt---
wc -l hugebin.txt
 1444473 hugebin.txt
time node reader.js hugebin.txt
hugebin.txt 1844188
        2.18 real         2.13 user         0.11 sys
time ./myreader hugebin.txt
hugebin.txt 1444473
        5.13 real         5.09 user         0.03 sys
time ./builtinreader hugebin.txt
hugebin.txt 279
        0.00 real         0.00 user         0.00 sys
time python reader.py hugebin.txt
hugebin.txt 1444473
        0.50 real         0.42 user         0.05 sys

---hugestr.txt---
wc -l hugestr.txt
 2078017 hugestr.txt
time node reader.js hugestr.txt
hugestr.txt 3095841
        0.65 real         0.63 user         0.08 sys
time ./myreader hugestr.txt
hugestr.txt 2078017
        5.11 real         5.06 user         0.03 sys
time ./builtinreader hugestr.txt
hugestr.txt 3085918
        5.01 real         4.96 user         0.03 sys
time python reader.py hugestr.txt
hugestr.txt 2078017
        0.64 real         0.56 user         0.05 sys

As a table:

program inputfile time line count
wc hugebin.txt 1444473
reader.js hugebin.txt 2.18 1844188
Nim (custom proc) hugebin.txt 5.13 1444473
Nim (builtin proc) hugebin.txt 0 (failed) 279
reader.py hugebin.txt 0.5 1444473
wc hugestr.txt 2078017
reader.js hugestr.txt 0.65 3095841
Nim (custom proc) hugestr.txt 5.11 2078017
Nim (builtin proc) hugestr.txt 5.01 3085918
reader.py hugestr.txt 0.64 2078017
import streams
import os
iterator readFile*(stream:Stream): int =
var num = 0
var line = ""
while stream.readLine(line):
num += 1
yield line.len
iterator readFile*(s:string): int =
for x in readFile(newStringStream(s)):
yield x
var lines = 0
let filename = paramStr(1)
for x in readFile(newFileStream(filename)):
lines += 1
echo filename, " ", lines
#!/usr/bin/env python
import io
import sys
import random
import string
filename = sys.argv[1]
megs = int(sys.argv[2])
binary = int(sys.argv[3])
size = megs * (2**20)
max_line_size = 200
if binary:
def makeLine():
return bytearray(random.getrandbits(8) for _ in xrange(random.randint(0, max_line_size)))
else:
def makeLine():
return ''.join(random.choice(string.printable) for _ in xrange(random.randint(0, max_line_size)))
with io.open(filename, 'wb') as fh:
while fh.tell() < size:
fh.write(makeLine())
fh.write('\n')
.PHONY: all
test: all
@printf "\n---hugebin.txt---\n"
wc -l hugebin.txt
time node reader.js hugebin.txt
time ./myreader hugebin.txt
time ./builtinreader hugebin.txt
time python reader.py hugebin.txt
@printf "\n---hugestr.txt---\n"
wc -l hugestr.txt
time node reader.js hugestr.txt
time ./myreader hugestr.txt
time ./builtinreader hugestr.txt
time python reader.py hugestr.txt
all: hugebin.txt hugestr.txt myreader builtinreader
hugebin.txt:
python makeahugefile.py $@ 100 1
hugestr.txt:
python makeahugefile.py $@ 100 0
myreader: myreader.nim
nim c --out:$@ -d:release myreader.nim
builtinreader: builtinreader.nim
nim c --out:$@ -d:release builtinreader.nim
import streams
import os
proc readLine(s: Stream, line: var TaintedString, delimiter = '\n'): bool =
line.string.setLen(0)
result = true
while result:
var c = readChar(s)
if c == delimiter:
break
elif c == '\0':
if s.atEnd():
result = false
line.string.add(c)
iterator readFile*(stream:Stream): int =
var num = 0
var line = ""
while stream.readLine(line):
num += 1
yield line.len
iterator readFile*(s:string): int =
for x in readFile(newStringStream(s)):
yield x
var lines = 0
let filename = paramStr(1)
for x in readFile(newFileStream(filename)):
lines += 1
echo filename, " ", lines
const readline = require("readline");
const fs = require("fs");
async function readLogFile(path, cb) {
let NUM = 0;
return new Promise((resolve, reject) => {
const linereader = readline.createInterface({
input: fs.createReadStream(path),
})
linereader.on('line', (line) => {
cb(line.length);
NUM++;
})
linereader.on('close', () => {
resolve(NUM);
})
})
}
const filename = process.argv[2];
readLogFile(filename, (line) => {
}).then(result => {
console.log(filename, result);
}).catch(err => {
console.log('Error', err);
})
import sys
import io
filename = sys.argv[1]
def cb(x):
pass
linecount = 0
with io.open(filename, 'rb') as fh:
for line in fh:
linecount += 1
cb(len(line))
print filename, linecount
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment