Skip to content

Instantly share code, notes, and snippets.

@rayman22201
Forked from iffy/.gitignore
Last active September 21, 2018 19:45
Show Gist options
  • Save rayman22201/6b26e1a14b5e4f04776103de976cd8d7 to your computer and use it in GitHub Desktop.
Save rayman22201/6b26e1a14b5e4f04776103de976cd8d7 to your computer and use it in GitHub Desktop.
Comparing line-reading in Python/Nim/Node
hugefile.txt
myreader
fgetsreader
builtinreader
hugebin.txt
hugestr.txt

Updated Output including the results of using File (and fgets) directly: Note, that my results will be slightly different, b/c it's a different machine, ymmv etc...

---hugebin.txt---
wc -l hugebin.txt
1444162 hugebin.txt
time nodejs reader.js hugebin.txt
hugebin.txt 1845392
4.37user 0.16system 0:04.36elapsed 103%CPU (0avgtext+0avgdata 64796maxresident)k
0inputs+0outputs (0major+11499minor)pagefaults 0swaps
time ./myreader hugebin.txt
hugebin.txt 1444162
5.84user 0.02system 0:05.87elapsed 99%CPU (0avgtext+0avgdata 1432maxresident)k
0inputs+0outputs (0major+94minor)pagefaults 0swaps
time ./fgetsreader hugebin.txt
hugebin.txt 1444162
0.25user 0.03system 0:00.29elapsed 99%CPU (0avgtext+0avgdata 1432maxresident)k
0inputs+0outputs (0major+96minor)pagefaults 0swaps
time ./builtinreader hugebin.txt
hugebin.txt 71
0.00user 0.00system 0:00.00elapsed ?%CPU (0avgtext+0avgdata 1416maxresident)k
0inputs+0outputs (0major+95minor)pagefaults 0swaps
time python reader.py hugebin.txt
hugebin.txt 1444162
0.84user 0.07system 0:00.92elapsed 99%CPU (0avgtext+0avgdata 6792maxresident)k
0inputs+0outputs (0major+848minor)pagefaults 0swaps

---hugestr.txt---
wc -l hugestr.txt
2078997 hugestr.txt
time nodejs reader.js hugestr.txt
hugestr.txt 3095724
1.11user 0.10system 0:01.00elapsed 120%CPU (0avgtext+0avgdata 46828maxresident)k
0inputs+0outputs (0major+7034minor)pagefaults 0swaps
time ./myreader hugestr.txt
hugestr.txt 2078997
5.52user 0.02system 0:05.55elapsed 99%CPU (0avgtext+0avgdata 1452maxresident)k
0inputs+0outputs (0major+96minor)pagefaults 0swaps
time ./fgetsreader hugestr.txt
hugestr.txt 2078997
0.43user 0.02system 0:00.45elapsed 99%CPU (0avgtext+0avgdata 1408maxresident)k
0inputs+0outputs (0major+95minor)pagefaults 0swaps
time ./builtinreader hugestr.txt
hugestr.txt 3085795
4.52user 0.06system 0:04.58elapsed 99%CPU (0avgtext+0avgdata 1408maxresident)k
0inputs+0outputs (0major+92minor)pagefaults 0swaps
time python reader.py hugestr.txt
hugestr.txt 2078997
1.68user 0.06system 0:01.75elapsed 99%CPU (0avgtext+0avgdata 6788maxresident)k
0inputs+0outputs (0major+847minor)pagefaults 0swaps

As a table:

program inputfile time line count
wc hugebin.txt 1444162
reader.js hugebin.txt 4.37 1845392
Nim (custom proc) hugebin.txt 5.84 2078997
Nim (builtin proc) hugebin.txt 0 (failed) 71
Nim (File/fgets direct) hugebin.txt 0.25 1444162
reader.py hugebin.txt 0.84 1444162
wc hugestr.txt 2078997
reader.js hugestr.txt 1.11 3095724
Nim (custom proc) hugestr.txt 5.52 2078017
Nim (builtin proc) hugestr.txt 4.52 3085795
Nim (File/fgets direct) hugestr.txt 0.43 2078997
reader.py hugestr.txt 1.68 2078997
import streams
import os
iterator readFile*(stream:Stream): int =
var num = 0
var line = ""
while stream.readLine(line):
num += 1
yield line.len
iterator readFile*(s:string): int =
for x in readFile(newStringStream(s)):
yield x
var lines = 0
let filename = paramStr(1)
for x in readFile(newFileStream(filename)):
lines += 1
echo filename, " ", lines
import os
iterator readFile*(f:File): int =
var num = 0
var line = ""
while f.readLine(line):
num += 1
yield line.len
iterator readFile*(s:string): int =
var f = open(s)
try:
for x in readFile(f):
yield x
finally:
close(f)
var lines = 0
let filename = paramStr(1)
for x in readFile(filename):
lines += 1
echo filename, " ", lines
#!/usr/bin/env python
import io
import sys
import random
import string
filename = sys.argv[1]
megs = int(sys.argv[2])
binary = int(sys.argv[3])
size = megs * (2**20)
max_line_size = 200
if binary:
def makeLine():
return bytearray(random.getrandbits(8) for _ in xrange(random.randint(0, max_line_size)))
else:
def makeLine():
return ''.join(random.choice(string.printable) for _ in xrange(random.randint(0, max_line_size)))
with io.open(filename, 'wb') as fh:
while fh.tell() < size:
fh.write(makeLine())
fh.write('\n')
.PHONY: all
test: all
@printf "\n---hugebin.txt---\n"
wc -l hugebin.txt
time nodejs reader.js hugebin.txt
time ./myreader hugebin.txt
time ./fgetsreader hugebin.txt
time ./builtinreader hugebin.txt
time python reader.py hugebin.txt
@printf "\n---hugestr.txt---\n"
wc -l hugestr.txt
time nodejs reader.js hugestr.txt
time ./myreader hugestr.txt
time ./fgetsreader hugestr.txt
time ./builtinreader hugestr.txt
time python reader.py hugestr.txt
all: hugebin.txt hugestr.txt myreader fgetsreader builtinreader
hugebin.txt:
python makeahugefile.py $@ 100 1
hugestr.txt:
python makeahugefile.py $@ 100 0
myreader: myreader.nim
nim c --out:$@ -d:release myreader.nim
fgetsreader: fgetsreader.nim
nim c --out:$@ -d:release fgetsreader.nim
builtinreader: builtinreader.nim
nim c --out:$@ -d:release builtinreader.nim
import streams
import os
proc readLine(s: Stream, line: var TaintedString, delimiter = '\n'): bool =
line.string.setLen(0)
result = true
while result:
var c = readChar(s)
if c == delimiter:
break
elif c == '\0':
if s.atEnd():
result = false
line.string.add(c)
iterator readFile*(stream:Stream): int =
var num = 0
var line = ""
while stream.readLine(line):
num += 1
yield line.len
iterator readFile*(s:string): int =
for x in readFile(newStringStream(s)):
yield x
var lines = 0
let filename = paramStr(1)
for x in readFile(newFileStream(filename)):
lines += 1
echo filename, " ", lines
const readline = require("readline");
const fs = require("fs");
async function readLogFile(path, cb) {
let NUM = 0;
return new Promise((resolve, reject) => {
const linereader = readline.createInterface({
input: fs.createReadStream(path),
})
linereader.on('line', (line) => {
cb(line.length);
NUM++;
})
linereader.on('close', () => {
resolve(NUM);
})
})
}
const filename = process.argv[2];
readLogFile(filename, (line) => {
}).then(result => {
console.log(filename, result);
}).catch(err => {
console.log('Error', err);
})
import sys
import io
filename = sys.argv[1]
def cb(x):
pass
linecount = 0
with io.open(filename, 'rb') as fh:
for line in fh:
linecount += 1
cb(len(line))
print filename, linecount
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment