String split 1 million line string from Heroku log file:
- Elixir 166s
- Clojure 8s
- Ruby 1s
- Python 0.5s
- JavaScript - buffer size error
defmodule LogStats.Logger do
def elapsed(message, function) do
IO.write("#{message}...")
{elapsed_micro, result} = :timer.tc(function)
elapsed_ms = Float.round(elapsed_micro / 1000)
IO.puts("#{elapsed_ms} ms")
result
end
end
defmodule LogStats do
alias LogStats.Logger
# TODO: refactor to using Stream and Flow, see: https://www.youtube.com/watch?v=XPlXNUXmcgE
def main(args) do
[file_path] = args
data = Logger.elapsed("Reading file", fn ->
File.read!(file_path)
end)
lines = Logger.elapsed("Splitting lines", fn ->
data |> String.split("\n")
end)
end
end
Needed to add :jvm-opts ["-Xmx4G"]
to project.clj to avoid the java.lang.OutOfMemoryError heap space error.
(let [file-path (first *command-line-args*)
data (slurp file-path)
lines (time (clojure.string/split-lines data))])
def elapsed(config, message)
start_at = Time.now
print(message + '...')
result = yield
elapsed = ((Time.now - start_at) * 1000.0).round
puts(" #{elapsed} ms")
result
end
file_path = ARGV[0]
log_file_data = elapsed(config, "Reading log file #{file_path}") do
IO.read(file_path)
end
log_file_lines = elapsed(config, "Splitting lines") do
log_file_data.split("\n")
end
import time
import sys
def elapsed(message, fn):
sys.stdout.write(message + "... ")
start_time = time.time()
result = fn()
elapsed = int(round((time.time() - start_time)*1000.0))
print("%(elapsed)sms" % locals())
return result
file_path = sys.argv[1]
data = elapsed("Reading file " + file_path, lambda: open(file_path, "r").read())
lines = elapsed("Splitting lines", lambda: data.split("\n"))
const fs = require('fs')
function elapsed(message, fn) {
const startTime = new Date()
process.stdout.write(`${message}... `)
const result = fn()
const elapsed = Math.round(new Date() - startTime)
console.log(`${elapsed}ms`)
return result
}
const filePath = process.argv[2]
const data = elapsed(`Reading file ${filePath}`, () => {
return fs.readFileSync(filePath, {encoding: 'ascii'})
})
console.log(typeof data)
const lines = elapsed('Splitting lines', () => data.split("\n"))
console.log(`Number of lines: ${lines.length}`)