Skip to content

Instantly share code, notes, and snippets.

@kmsquire
Created November 29, 2012 10:14
Show Gist options
  • Save kmsquire/4168004 to your computer and use it in GitHub Desktop.
Save kmsquire/4168004 to your computer and use it in GitHub Desktop.
timsort for julia
## modeled after sort tests in python's sortperf.py
##
## Note: as written, it requires DataFrames + https://github.com/HarlanH/DataFrames.jl/pull/83
##
## Kevin Squire
##
##
#
# julia> load("timsort")
#
# julia> load("sortperf")
#
# julia> df = main()
# Testing Int64...
# 15
# sort!
# 1
# mergesort!
# 1
# timsort!
# 1
# Testing Float64...
# 15
# sort!
# 1
# mergesort!
# 1
# timsort!
# 1
# Testing String[5]...
# 15
# sort!
# 1
# mergesort!
# 1
# timsort!
# 1
# Testing String[10]...
# 15
# sort!
# 1
# mergesort!
# 1
# timsort!
# 1
# DataFrame (12,11)
# test_type sort_fn log_size size *sort \sort /sort 3sort +sort =sort !sort
# [1,] Int64 "sort!" 15 32768 0.00136185 0.000911951 0.000935793 0.000915766 0.000916004 0.000937939 NA
# [2,] Int64 "mergesort!" 15 32768 0.00493693 0.00133395 0.00114083 0.00123811 0.00115705 0.00114894 NA
# [3,] Int64 "timsort!" 15 32768 0.006284 0.000840187 0.000105143 0.000399113 0.00022912 0.000110865 NA
# [4,] Float64 "sort!" 15 32768 0.0029211 0.000827074 0.000762939 0.000764132 0.00123119 0.00116014 NA
# [5,] Float64 "mergesort!" 15 32768 0.00792503 0.00377011 0.00169587 0.00190187 0.00170588 0.00176501 NA
# [6,] Float64 "timsort!" 15 32768 0.00711799 0.000241041 0.000189066 0.000549078 0.000316143 0.000194073 NA
# [7,] "String[5]" "sort!" 15 32768 0.177883 0.207631 0.101615 0.108721 0.101743 0.090728 NA
# [8,] "String[5]" "mergesort!" 15 32768 0.18063 0.203479 0.101664 0.107972 0.103052 0.0884719 NA
# [9,] "String[5]" "timsort!" 15 32768 0.169823 0.0156841 0.00670505 0.00736094 0.00702691 0.00162983 NA
# [10,] "String[10]" "sort!" 15 32768 0.178571 0.203515 0.101603 0.106281 0.101085 0.088846 NA
# [11,] "String[10]" "mergesort!" 15 32768 0.178402 0.202911 0.101342 0.106357 0.101818 0.0880799 NA
# [12,] "String[10]" "timsort!" 15 32768 0.169426 0.0156932 0.00672221 0.00719094 0.00708008 0.00165296 NA
load("DataFrames")
using DataFrames
randstr_fn(str_len::Int) = n -> [randstring(str_len) for i = 1:n]
randint_fn(m::Int) = n -> randi(m,n)
mergesort!{T}(a::AbstractVector{T}) = Base._jl_mergesort(a, 1, length(a), Array(T,length(a)))
sortperf(tsort!::Function, n::Int) = sortperf(tsort!, n, true, rand)
sortperf(tsort!::Function, n::Int, randfn::Function) = sortperf(tsort!, n, true, randfn)
sortperf(tsort!::Function, n::Int, qsort_path::Bool) = sortperf(tsort!, n, qsort_path, rand)
function sortperf(tsort!::Function, n::Int, qsort_path::Bool, randfn::Function)
gc()
srand(1)
times = Dict{String, Float64}()
data = randfn(n)
gc()
times["*sort"] = @elapsed tsort!(data)
@assert issorted(data)
reverse!(data)
gc()
times["\\sort"] = @elapsed tsort!(data)
@assert issorted(data)
gc()
times["/sort"] = @elapsed tsort!(data)
@assert issorted(data)
for i = 1:3
n1 = randi(n)
n2 = randi(n)
data[n1], data[n2] = data[n2], data[n1]
end
gc()
times["3sort"] = @elapsed tsort!(data)
@assert issorted(data)
data[end-9:end] = randfn(10)
gc()
times["+sort"] = @elapsed tsort!(data)
@assert issorted(data)
data = data[randi(4, n)]
gc()
times["~sort"] = @elapsed tsort!(data)
@assert issorted(data)
data = data[ones(Int, n)]
gc()
times["=sort"] = @elapsed tsort!(data)
@assert issorted(data)
if qsort_path
data = sort!(randfn(div(n,2)))
data = vcat(reverse(data), data)
gc()
times["!sort"] = @elapsed tsort!(data)
@assert issorted(data)
end
times
end
main() = main(15:15)
main(log2range::Range1) = main(log2range, 1, false)
main(log2range::Range1, replicates::Int) = main(log2range, replicates, false)
function main(log2range::Range1, replicates::Int, do_qsort!::Bool)
times = Dict[]
for (tt_name, test_type) in Any[(Int, randint_fn(10)), (Float64, rand), ("String[5]", randstr_fn(5)),
("String[10]", randstr_fn(10))]
println("Testing $tt_name...")
for logsize in log2range
println(" $logsize")
size = 2^logsize
for s! in [sort!, mergesort!, timsort!]
println(" $(s!)")
print(" ")
# replicate
for i = 1:replicates
print(i, " "); flush(stdout_stream)
rec = {"test_type"=>tt_name, "sort_fn"=>string(s!), "log_size"=>logsize, "size"=>size}
merge!(rec, sortperf(s!, size, false, test_type))
push(times, rec)
end
println()
end
end
end
# This will fail if patch
# https://github.com/HarlanH/DataFrames.jl/pull/83
# is not installed
df = DataFrame(times, ["test_type", "sort_fn", "log_size", "size", "*sort",
"\\sort", "/sort", "3sort", "~sort", "+sort", "=sort", "!sort"])
end
# timsort for julia
# Kevin Squire
#
# This is an implementation of timsort based on the algorithm description at
#
# http://svn.python.org/projects/python/trunk/Objects/listsort.txt
# http://en.wikipedia.org/wiki/Timsort
#
typealias Run Range1{Int}
const MIN_GALLOP = 7
type MergeState
runs::Array{Run}
min_gallop::Int
end
MergeState() = MergeState(Run[], MIN_GALLOP)
islte(x,y) = !isless(y,x)
for (loc, cmpfn) in [(:last, :islte),
(:first, :isless)]
local gallop = symbol("gallop_$(loc)")
local rgallop = symbol("rgallop_$(loc)")
@eval begin
# Galloping binary search starting at left
function ($gallop)(a::AbstractVector, x, lo::Int, hi::Int)
i = lo
inc = 1
lo = lo-1
hi = hi+1
while i < hi && ($cmpfn)(a[i],x)
lo = i
i += inc
inc <<= 1
end
hi = min(i+1, hi)
# Binary search
while lo < hi-1
i = (lo+hi)>>>1
if ($cmpfn)(a[i], x)
lo = i
else
hi = i
end
end
hi
end
end
@eval begin
# Galloping binary search starting at right
function ($rgallop)(a::AbstractVector, x, lo::Int, hi::Int)
i = hi
dec = 1
lo = lo-1
hi = hi+1
while i > lo && !($cmpfn)(a[i],x)
hi = i
i -= dec
dec <<= 1
end
lo = max(lo, i-1)
# Binary search
while lo < hi-1
i = (lo+hi)>>>1
if ($cmpfn)(a[i], x)
lo = i
else
hi = i
end
end
hi
end
end
end
# Determine a good minimum run size for efficient merging
# For details, see "Computing minrun" in
# http://svn.python.org/projects/python/trunk/Objects/listsort.txt
function merge_compute_minrun(N::Int)
r = 0
while N > 63
r |= (N & 1)
N >>= 1
end
N + r
end
# Count the next run length
# Returns the length, and 'true' if ascending
function count_run(v::AbstractVector, lo::Int)
if lo == length(v)
return (1, true)
end
if !isless(v[lo+1], v[lo])
for i = lo+2:length(v)
if isless(v[i], v[i-1])
return(i-lo, true)
end
end
return (length(v)-lo+1, true)
else
for i = lo+2:length(v)
if !isless(v[i], v[i-1])
return(i-lo, false)
end
end
return (length(v)-lo+1, false)
end
end
# Merge consecutive runs
# For A,B,C = last three lengths, merge_collapse!()
# maintains 2 invariants:
#
# A > B + C
# B > C
#
# If any of these are violated, a merge occurs to
# correct it
merge_collapse!(v::AbstractVector, state::MergeState) = merge_collapse!(v, state, false)
function merge_collapse!(v::AbstractVector, state::MergeState, force::Bool)
if length(state.runs) < 2
return v
end
while length(state.runs) > 2
(a,b,c) = state.runs[end-2:end]
if length(a) > length(b)+length(c) && length(b) > length(c) && !force
break # invariants are satisfied, leave loop
end
if length(a) < length(c)
merge(v,a,b,state)
pop(state.runs)
pop(state.runs)
pop(state.runs)
push(state.runs, first(a):last(b))
push(state.runs, c)
else
merge(v,b,c,state)
pop(state.runs)
pop(state.runs)
push(state.runs, first(b):last(c))
end
end
if length(state.runs) == 2
(a,b) = state.runs[end-1:end]
if length(a) <= length(b) || force
merge(v,a,b,state)
pop(state.runs)
pop(state.runs)
push(state.runs, first(a):last(b))
end
end
v
end
# Merge runs a and b in vector v
function merge(v::AbstractVector, a::Run, b::Run, state::MergeState)
# First elements in a <= b[1] are already in place
a = gallop_last(v, v[first(b)], first(a), last(a)) : last(a)
# Last elements in b >= a[end] are already in place
b = first(b) : rgallop_first(v, v[last(a)], first(b), last(b))-1
if length(a) == 0 || length(b) == 0
return
end
# Choose merge_lo or merge_hi based on the amount
# of temporary memory needed (smaller of a and b)
if length(a) < length(b)
merge_lo(v, a, b, state)
else
merge_hi(v, a, b, state)
end
end
function merge_lo(v::AbstractVector, a::Run, b::Run, state::MergeState)
# Copy a
v_a = v[a]
# Pointer into (sub)arrays
p = first(a)
from_a = 1
from_b = first(b)
mode = :normal
while true
if mode == :normal
# Compare and copy element by element
count_a = count_b = 0
while from_a <= length(a) && from_b <= last(b)
if v[from_b] < v_a[from_a]
v[p] = v[from_b]
from_b += 1
count_a = 0
count_b += 1
else
v[p] = v_a[from_a]
from_a += 1
count_a += 1
count_b = 0
end
p += 1
# Switch to galloping mode if a string of elements
# has come from the same set
if count_b >= state.min_gallop || count_a >= state.min_gallop
mode = :galloping
break
end
end
# Finalize if we've exited the loop normally
if mode == :normal
mode = :finalize
end
end
if mode == :galloping
# Use binary search to find range to copy
while from_a <= length(a) && from_b <= last(b)
# Copy the next run from b
b_run = from_b : gallop_first(v, v_a[from_a], from_b, last(b)) - 1
p_end = p + length(b_run) - 1
v[p:p_end] = v[b_run]
p = p_end + 1
from_b = last(b_run) + 1
# ... then copy the first element from a
v[p] = v_a[from_a]
p += 1
from_a += 1
if from_a > length(a) || from_b > last(b) break end
# Copy the next run from a
a_run = from_a : gallop_last(v_a, v[from_b], from_a, length(a)) - 1
p_end = p + length(a_run) - 1
v[p:p_end] = v_a[a_run]
p = p_end + 1
from_a = last(a_run) + 1
# ... then copy the first element from b
v[p] = v[from_b]
p += 1
from_b += 1
# Return to normal mode if we haven't galloped...
if length(a_run) < MIN_GALLOP && length(b_run) < MIN_GALLOP
mode = :normal
break
end
# Reduce min_gallop if this gallop was successful
state.min_gallop -= 1
end
if mode == :galloping
mode = :finalize
end
state.min_gallop = max(state.min_gallop,0) + 2 # penalty for leaving gallop mode
end
if mode == :finalize
# copy end of a
p_end = p + (length(a) - from_a)
v[p:p_end] = v_a[from_a:end]
break
end
end
end
function merge_hi(v::AbstractVector, a::Run, b::Run, state::MergeState)
# Copy b
v_b = v[b]
# Pointer into (sub)arrays
p = last(b)
from_a = last(a)
from_b = length(b)
mode = :normal
while true
if mode == :normal
# Compare and copy element by element
count_a = count_b = 0
while from_a >= first(a) && from_b >= 1
if v_b[from_b] > v[from_a]
v[p] = v_b[from_b]
from_b -= 1
count_a = 0
count_b += 1
else
v[p] = v[from_a]
from_a -= 1
count_a += 1
count_b = 0
end
p -= 1
# Switch to galloping mode if a string of elements
# has come from the same set
if count_b >= state.min_gallop || count_a >= state.min_gallop
mode = :galloping
break
end
end
# Finalize if we've exited the loop normally
if mode == :normal
mode = :finalize
end
end
if mode == :galloping
# Use binary search to find range to copy
while from_a >= first(a) && from_b >= 1
# Copy the next run from b
b_run = rgallop_first(v_b, v[from_a], 1, from_b) : from_b
p_start = p - length(b_run) + 1
v[p_start:p] = v_b[b_run]
p = p_start - 1
from_b = first(b_run) - 1
# ... then copy the first element from a
v[p] = v[from_a]
p -= 1
from_a -= 1
if from_a < first(a) || from_b < 1 break end
# Copy the next run from a
a_run = rgallop_last(v, v_b[from_b], first(a), from_a) : from_a
p_start = p - length(a_run) + 1
v[p_start:p] = v[a_run]
p = p_start - 1
from_a = first(a_run) - 1
# ... then copy the first element from b
v[p] = v_b[from_b]
p -= 1
from_b -= 1
# Return to normal mode if we haven't galloped...
if length(a_run) < MIN_GALLOP && length(b_run) < MIN_GALLOP
mode = :normal
break
end
# Reduce min_gallop if this gallop was successful
state.min_gallop -= 1
end
if mode == :galloping
mode = :finalize
end
state.min_gallop = max(state.min_gallop, 0) + 2 # penalty for leaving gallop mode
end
if mode == :finalize
# copy start of b
p_start = p - from_b + 1
v[p_start:p] = v_b[1:from_b]
break
end
end
end
function timsort!(v::AbstractVector)
# Initialization
minrun = merge_compute_minrun(length(v))
state = MergeState()
p = 1
while p <= length(v)
(count, ascending) = count_run(v, p)
if count < minrun
# Make a run of length minrun
count = min(minrun, length(v)-p+1)
run_range = p:p+count-1
Base._jl_insertionsort(v, p, p+count-1)
else
# We found a run!
run_range = p:p+count-1
if !ascending
#reverse!(sub(v, run_range))
# Why is this faster?
for i = 0:div(count,2)-1
v[p+i], v[p+count-i-1] = v[p+count-i-1], v[p+i]
end
end
end
# Push this run onto the queue and merge if needed
push(state.runs, run_range)
p = p+count
merge_collapse!(v, state);
end
# Force merge at the end
if length(state.runs) > 1
merge_collapse!(v, state, true)
end
v
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment