Skip to content

Instantly share code, notes, and snippets.

@jl2
Last active September 21, 2018 16:29
Show Gist options
  • Save jl2/6e42e3c91c0ec87a1a0537c7b2827d96 to your computer and use it in GitHub Desktop.
Save jl2/6e42e3c91c0ec87a1a0537c7b2827d96 to your computer and use it in GitHub Desktop.
;; The Computer Language Benchmarks Game
;; https://salsa.debian.org/benchmarksgame-team/benchmarksgame/
;;;
;;; resubmitted by Wade Humeniuk (Fix Stream Problem)
;;; resubmitted by Jon Smith (Remove silly assertion causing it to break on 16000 size)
;;; re-resubmitted by Jon Smith (with a silly hack to make it parallel).
;;; Original contributed by Yannick Gingras
;;;
;;; To compile
;;; sbcl --load mandelbrot.lisp --no-userinit --eval "(declaim (optimize (speed 3) (safety 0) (debug 0)))" --eval "(save-lisp-and-die \"mandelbrot\" :executable t :purify t :toplevel (lambda () (main) (quit)))"
;;; To run
;;; ./mandelbrot %A
(defun render (size stream)
(declare (type fixnum size) (stream stream)
(optimize (speed 3) (safety 0) (debug 0)))
(let* ((code 0)
(bit 0)
(zr 0.0d0)
(zi 0.0d0)
(tr 0.0d0)
(delta (/ 2d0 size))
(base-real -1.5d0)
(base-imag -1.0d0)
(buffer (make-array (* size (ceiling size 8)) :element-type '(unsigned-byte 8)))
(index 0))
(declare (type (unsigned-byte 8) code )
(type double-float zr zi tr base-real base-imag delta)
(type fixnum index bit))
(dotimes (y size)
(setf base-imag (- 1.0d0 (* delta y)))
(dotimes (x size)
(setf base-real (+ -1.5d0 (* delta x))
zr base-real
zi base-imag)
(setf code
(if (dotimes (n 50)
(when (< 4.0d0 (+ (* zr zr) (* zi zi)))
(return t))
(setf tr (+ (* zr zr) (- (* zi zi)) base-real)
zi (+ (* 2.0d0 zr zi) base-imag)
zr tr))
(ash code 1)
(logior (ash code 1) #x01)))
(when (= (incf bit) 8)
(setf (aref buffer index) code
bit 0 code 0)
(incf index))))
(write-sequence buffer stream)))
(defun par-render (size stream thread-count)
(declare (type fixnum size) (stream stream)
(optimize (speed 3) (safety 0) (debug 0)))
(let* ((buffer (make-array (* size (ceiling size 8)) :element-type '(unsigned-byte 8)))
(quarter-size (ceiling size thread-count))
(quarter-array (ceiling (the (unsigned-byte 32) (* size size)) (* thread-count 8))))
(declare (inline render-sub)
(type fixnum thread-count))
(labels ((render-sub (y-start y-end index)
(let ((code 0)
(bit 0)
(zr 0.0d0) (zi 0.0d0) (tr 0.0d0)
(delta (/ 2d0 size))
(base-real -1.5d0) (base-imag -1.0d0))
(declare (type (unsigned-byte 8) code)
(type double-float zr zi tr base-real base-imag delta)
(type fixnum index bit))
(do ((y y-start (1+ y)))
((= y y-end))
(declare (type (unsigned-byte 32) y))
(setf base-imag (- 1.0d0 (* delta y)))
(dotimes (x size)
(setf base-real (+ -1.5d0 (* delta x))
zr base-real
zi base-imag)
(setf code
(if (dotimes (n 50)
(when (< 4.0d0 (+ (* zr zr) (* zi zi)))
(return t))
(setf tr (+ (* zr zr) (- (* zi zi)) base-real)
zi (+ (* 2.0d0 zr zi) base-imag)
zr tr))
(ash code 1)
(logior (ash code 1) #x01)))
(when (= (incf bit) 8)
(setf (aref buffer index) code
bit 0
code 0)
(incf index))
)))))
(let (threads)
(dotimes (i thread-count)
(let ((start (* i quarter-size))
(end (* (+ i 1) quarter-size))
(idx (* i quarter-array)))
(push (sb-thread:make-thread (lambda () (render-sub start end idx))) threads)))
(dolist (thread threads)
(sb-thread:join-thread thread)))
(write-sequence buffer stream))))
(defun main ()
(declare (optimize (speed 0) (safety 3)))
(let* ((args sb-ext:*posix-argv*)
(n (parse-integer (second args)))
(thread-count (if (third args) (parse-integer (third args)) 4)))
(with-open-stream (stream (sb-sys:make-fd-stream (sb-sys:fd-stream-fd sb-sys:*stdout*)
:element-type :default
:buffering :full
:output t :input nil))
(format stream "P4~%~d ~d~%" n n)
#+sb-thread(par-render n stream thread-count)
#-sb-thread(render n stream)
(force-output stream))))
-- The Computer Language Benchmarks Game
-- https://salsa.debian.org/benchmarksgame-team/benchmarksgame/
-- contributed by Mike Pall
-- modified by Rob Kendrick to be parallel
-- modified by Isaac Gouy
-- called with the following arguments on the command line;
-- 1: size of mandelbrot to generate
-- 2: number of children to spawn (defaults to 6, which works well on 4-way)
-- If this is a child, then there will be additional parameters;
-- 3: start row
-- 4: end row
--
-- Children buffer up their output and emit it to stdout when
-- finished, to avoid stalling due to a full pipe.
local width = tonumber(arg and arg[1]) or 100
local children = tonumber(arg and arg[2]) or 4
local srow = tonumber(arg and arg[3])
local erow = tonumber(arg and arg[4])
local height, wscale = width, 2/width
local m, limit2 = 50, 4.0
local write, char = io.write, string.char
if not srow then
-- we are the parent process. emit the header, and then spawn children
local workunit = math.floor(width / (children + 1))
local handles = { }
write("P4\n", width, " ", height, "\n")
children = children - 1
for i = 0, children do
local cs, ce
if i == 0 then
cs = 0
ce = workunit
elseif i == children then
cs = (workunit * i) + 1
ce = width - 1
else
cs = (workunit * i) + 1
ce = cs + workunit - 1
end
handles[i + 1] = io.popen(("%s %s %d %d %d %d"):format(
arg[-1], arg[0], width, children + 1, cs, ce))
end
-- collect answers, and emit
for i = 0, children do
write(handles[i + 1]:read "*a")
end
else
-- we are a child process. do the work allocated to us.
local obuff = { }
for y=srow,erow do
local Ci = 2*y / height - 1
for xb=0,width-1,8 do
local bits = 0
local xbb = xb+7
for x=xb,xbb < width and xbb or width-1 do
bits = bits + bits
local Zr, Zi, Zrq, Ziq = 0.0, 0.0, 0.0, 0.0
local Cr = x * wscale - 1.5
for i=1,m do
local Zri = Zr*Zi
Zr = Zrq - Ziq + Cr
Zi = Zri + Zri + Ci
Zrq = Zr*Zr
Ziq = Zi*Zi
if Zrq + Ziq > limit2 then
bits = bits + 1
break
end
end
end
if xbb >= width then
for x=width,xbb do bits = bits + bits + 1 end
end
obuff[#obuff + 1] = char(255 - bits)
end
end
write(table.concat(obuff))
end

LuaJIT and SBCL Benchmark

Overview

This is a quick benchmark comparing LuaJit and SBCL using the Mandelbrot set program from Debian's benchmark site.

Software Versions

For this comparison I built both compilers from source using the tip of their Git repos as of this past Tuesday night. For SBCL I'm using commit 92951c6f4 and for LuaJIT I'm using b025b01c5b.

I'm not familiar with the LuaJIT build process, so I used the default build procedure, and ran "make" and then "sudo make install".

For SBCL I built with the following features turned on: (:sb-show-assem :immobile-space :compact-instance-header :sb-thread :sb-futex :sb-xref-for-internals). I compiled and installed with "make.sh --dynamic-space-size=8192" and then "sudo sh install.sh"

$> sbcl --version
SBCL 1.4.11.92-92951c6f4
$>
$> luajit -v
LuaJIT 2.0.5 -- Copyright (C) 2005-2017 Mike Pall. http://luajit.org/
$>

Programs

For the LuaJIT program I'm using the fastest Lua code from the benchmark site with no changes.

The fastest Lisp code uses SBCL's assembly intrinsics, so I used the second fastest program. I modified it to take a thread count argument and also tweaked the build flags to create an executable instead of a core.

Test Procedure

I created two shell scripts (run_lua.sh and run_lisp.sh) to run the benchmark. Images of size 10000 and 20000 were generated using 4 and 8 threads, and each run was repeated 8 times.

# Runs Size Threads
8 10000 4
8 20000 4
8 10000 8
8 20000 8

LuaJIT Results

Size 10000, 4 Threads

User System % CPU Total
14.13s 0.16s 283% 5.034
14.06s 0.12s 308% 4.591
14.11s 0.13s 291% 4.883
14.13s 0.14s 281% 5.073
14.14s 0.12s 310% 4.596
13.96s 0.12s 279% 5.031
14.10s 0.13s 282% 5.043
14.09s 0.12s 280% 5.065

Size 10000, 8 Threads

User System %CPU Total
14.83s 0.11s 378% 3.944
14.62s 0.13s 357% 4.122
14.67s 0.14s 361% 4.097
14.60s 0.13s 355% 4.141
14.57s 0.10s 357% 4.111
14.70s 0.15s 365% 4.064
14.71s 0.16s 366% 4.058
14.82s 0.11s 376% 3.966

Size 20000, 4 Threads

User System % CPU Total
59.50s 0.45s 283% 21.166
59.53s 0.43s 285% 20.992
59.93s 0.43s 281% 21.478
59.35s 0.42s 290% 20.543
59.21s 0.41s 283% 20.997
59.28s 0.44s 303% 19.695
59.50s 0.45s 291% 20.545
59.62s 0.42s 295% 20.329

Size 20000, 8 Threads

User System % CPU Total
61.61s 0.54s 376% 16.514
61.83s 0.46s 376% 16.537
61.82s 0.47s 375% 16.584
61.52s 0.47s 377% 16.433
61.76s 0.44s 373% 16.635
61.66s 0.48s 371% 16.722
61.52s 0.43s 370% 16.738
61.86s 0.47s 371% 16.763

SBCL Results

Size 10000, 4 Threads

User System % CPU Total
13.32s 0.03s 278% 4.801
13.34s 0.02s 276% 4.833
13.30s 0.03s 275% 4.831
13.47s 0.02s 275% 4.903
13.31s 0.03s 276% 4.832
13.32s 0.01s 275% 4.831
13.31s 0.02s 276% 4.827
13.32s 0.02s 275% 4.833

Size 10000, 8 Threads

User System % CPU Total
14.73s 0.03s 358% 4.123
14.72s 0.04s 357% 4.133
14.72s 0.03s 354% 4.158
14.74s 0.04s 357% 4.129
14.77s 0.03s 354% 4.170
14.67s 0.02s 353% 4.163
14.72s 0.03s 356% 4.136
14.70s 0.05s 354% 4.163

Size 20000, 4 Threads

User System % CPU Total
53.14s 0.07s 276% 19.271
54.11s 0.09s 274% 19.780
53.10s 0.08s 276% 19.266
53.11s 0.10s 275% 19.335
53.00s 0.06s 274% 19.364
53.11s 0.09s 275% 19.291
54.57s 0.07s 272% 20.066
53.13s 0.08s 275% 19.283

Size 20000, 8 Threads

User System % CPU Total
58.92s 0.07s 367% 16.040
58.67s 0.08s 354% 16.557
58.68s 0.07s 355% 16.513
58.78s 0.07s 356% 16.506
58.53s 0.08s 349% 16.765
58.69s 0.09s 352% 16.668
58.59s 0.08s 352% 16.636
58.72s 0.09s 355% 16.530
#!/usr/bin/env zsh
echo "Lisp, 4 threads, 10000"
time ./mandelbrot 10000 4 > 10000_4.pnm
time ./mandelbrot 10000 4 > 10000_4.pnm
time ./mandelbrot 10000 4 > 10000_4.pnm
time ./mandelbrot 10000 4 > 10000_4.pnm
time ./mandelbrot 10000 4 > 10000_4.pnm
time ./mandelbrot 10000 4 > 10000_4.pnm
time ./mandelbrot 10000 4 > 10000_4.pnm
time ./mandelbrot 10000 4 > 10000_4.pnm
echo "Lisp, 8 threads, 10000"
time ./mandelbrot 10000 8 > 10000_8.pnm
time ./mandelbrot 10000 8 > 10000_8.pnm
time ./mandelbrot 10000 8 > 10000_8.pnm
time ./mandelbrot 10000 8 > 10000_8.pnm
time ./mandelbrot 10000 8 > 10000_8.pnm
time ./mandelbrot 10000 8 > 10000_8.pnm
time ./mandelbrot 10000 8 > 10000_8.pnm
time ./mandelbrot 10000 8 > 10000_8.pnm
echo "Lisp, 4 threads, 20000"
time ./mandelbrot 20000 4 > 20000_4.pnm
time ./mandelbrot 20000 4 > 20000_4.pnm
time ./mandelbrot 20000 4 > 20000_4.pnm
time ./mandelbrot 20000 4 > 20000_4.pnm
time ./mandelbrot 20000 4 > 20000_4.pnm
time ./mandelbrot 20000 4 > 20000_4.pnm
time ./mandelbrot 20000 4 > 20000_4.pnm
time ./mandelbrot 20000 4 > 20000_4.pnm
echo "Lisp, 8 threads, 20000"
time ./mandelbrot 20000 8 > 20000_8.pnm
time ./mandelbrot 20000 8 > 20000_8.pnm
time ./mandelbrot 20000 8 > 20000_8.pnm
time ./mandelbrot 20000 8 > 20000_8.pnm
time ./mandelbrot 20000 8 > 20000_8.pnm
time ./mandelbrot 20000 8 > 20000_8.pnm
time ./mandelbrot 20000 8 > 20000_8.pnm
time ./mandelbrot 20000 8 > 20000_8.pnm
#!/usr/bin/env zsh
echo "Lua, 4 threads, 10000"
time luajit mandelbrot.lua 10000 4 > 10000_4.pnm
time luajit mandelbrot.lua 10000 4 > 10000_4.pnm
time luajit mandelbrot.lua 10000 4 > 10000_4.pnm
time luajit mandelbrot.lua 10000 4 > 10000_4.pnm
time luajit mandelbrot.lua 10000 4 > 10000_4.pnm
time luajit mandelbrot.lua 10000 4 > 10000_4.pnm
time luajit mandelbrot.lua 10000 4 > 10000_4.pnm
time luajit mandelbrot.lua 10000 4 > 10000_4.pnm
echo "Lua, 8 threads, 10000"
time luajit mandelbrot.lua 10000 8 > 10000_8.pnm
time luajit mandelbrot.lua 10000 8 > 10000_8.pnm
time luajit mandelbrot.lua 10000 8 > 10000_8.pnm
time luajit mandelbrot.lua 10000 8 > 10000_8.pnm
time luajit mandelbrot.lua 10000 8 > 10000_8.pnm
time luajit mandelbrot.lua 10000 8 > 10000_8.pnm
time luajit mandelbrot.lua 10000 8 > 10000_8.pnm
time luajit mandelbrot.lua 10000 8 > 10000_8.pnm
echo "Lua, 4 threads, 20000"
time luajit mandelbrot.lua 20000 4 > 20000_4.pnm
time luajit mandelbrot.lua 20000 4 > 20000_4.pnm
time luajit mandelbrot.lua 20000 4 > 20000_4.pnm
time luajit mandelbrot.lua 20000 4 > 20000_4.pnm
time luajit mandelbrot.lua 20000 4 > 20000_4.pnm
time luajit mandelbrot.lua 20000 4 > 20000_4.pnm
time luajit mandelbrot.lua 20000 4 > 20000_4.pnm
time luajit mandelbrot.lua 20000 4 > 20000_4.pnm
echo "Lua, 8 threads, 20000"
time luajit mandelbrot.lua 20000 8 > 20000_8.pnm
time luajit mandelbrot.lua 20000 8 > 20000_8.pnm
time luajit mandelbrot.lua 20000 8 > 20000_8.pnm
time luajit mandelbrot.lua 20000 8 > 20000_8.pnm
time luajit mandelbrot.lua 20000 8 > 20000_8.pnm
time luajit mandelbrot.lua 20000 8 > 20000_8.pnm
time luajit mandelbrot.lua 20000 8 > 20000_8.pnm
time luajit mandelbrot.lua 20000 8 > 20000_8.pnm
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment