kmsquire/OrderedDict_timings.txt

## dict_tests.jl
n = 100000
strs = [randstring(10) for i = 1:n];
nums = rand(Int, n);

# regular map

function dict_insertion_test{T<:Associative}(d::T)
    #empty!(d)
    for i = 1:n
        d[strs[i]] = nums[i]
    end
    d
end

function dict_deletion_test{T<:Associative}(d::T, iters::Int)
    #dict_insertion_test(d)
    for i in rand(1:n, iters)
        delete!(d, strs[i], 0)
    end
    d
end

function dict_ins_del_test{T<:Associative}(d::T, iters::Int)
    #dict_insertion_test(d)
    for i in rand(1:n, iters)
        randbool()? delete!(d, strs[i], 0) : (d[strs[i]] = nums[i])
    end
    d
end


function test_ins(T::Type, iter::Int)
    d = T{String,Int}()
    t = 0.0
    for i = 1:iter
      empty!(d)
      gc()
      t += @elapsed dict_insertion_test(d)
   end
   t
end

function test_ins_del(T::Type, iter::Int)
    d = T{String,Int}()
    t = 0.0
    for i = 1:iter
      empty!(d)
      dict_insertion_test(d)
      gc()
      t += @elapsed dict_ins_del_test(d, 100000)
   end
   t
end

function test_del(T::Type, iter::Int)
    d = T{String,Int}()
    t = 0.0
    for i = 1:iter
      empty!(d)
      dict_insertion_test(d)
      gc()
      t += @elapsed dict_deletion_test(d, 100000)
   end
   t
end

function run_all()
    for test in [test_ins, test_del, test_ins_del]
        println(test)
        println("="^length(string(test)))
        for T in [Dict, OrderedDict]
            print("$T: ")
            times = Float64[test(T, 5) for i = 1:5]
            println("$times, median=$(median(times))")
        end
        println()
    end
end

## OrderedDict_timings.txt
Here are 3 implemenations of an `OrderedDict` for julia.  The first
creates a `DictItem` class, which is stored as the value in a wrapped
Dict.  The second only stores a vector of keys, and the third
reimplements the standard julia hash-based dictionary inline, and adds
just enough information to maintain key-value order.

I had other, similar versions along the way, with minor tweaks that
didn't change the timings much.  This includes a version where
DictItem was immutable.


One version I did not implement was a doubly-linked-list version,
which should be much faster at deletion than the first two, but which
is more limited in the operations that can be done with it (e.g., not
shown here is code which allows the dictionary to be sorted or treated
as a dequeue of (key, value) pairs).  See
https://github.com/kmsquire/OrderedCollections.jl for a version which
includes this code.


`dict_tests.jl` is timing code that tests the insertion of 100,000
items, deletion of 100,000 items (with possible duplicates), and a
random mix of insertion and deletions.  The weak point of the first
two versions is clearly deletion.

============================================================================================
OrderedDictv1.jl
============================================================================================

julia> require("dict_tests.jl")

julia> require("OrderedDictv1.jl")

julia> run_all()
test_ins
========
Dict{K,V}: [0.339021, 0.293294, 0.292359, 0.293016, 0.291899], median=0.293015884
OrderedDict{K,V}: [0.558897, 0.432915, 0.43435, 0.43405, 0.435232], median=0.43434986900000006

test_del
========
Dict{K,V}: [0.584101, 0.565818, 0.565554, 0.564916, 0.565354], median=0.5655537869999999
OrderedDict{K,V}: [0.797462, 0.785816, 0.781939, 0.783949, 0.784663], median=0.784663365

test_ins_del
============
Dict{K,V}: [0.61392, 0.612826, 0.611372, 0.612516, 0.613474], median=0.612826085
OrderedDict{K,V}: [0.829546, 0.827917, 0.828826, 0.828606, 0.831101], median=0.8288263260000001


============================================================================================
OrderedDictv3.jl
============================================================================================
julia> require("dict_tests.jl")

julia> require("OrderedDictv3.jl")

julia> run_all()
test_ins
========
Dict{K,V}: [0.323519, 0.284433, 0.282385, 0.281725, 0.281555], median=0.282384714
OrderedDict{K,V}: [0.489741, 0.380406, 0.37986, 0.380098, 0.37905], median=0.38009831499999996

test_del
========
Dict{K,V}: [0.594232, 0.573725, 0.574156, 0.57504, 0.574906], median=0.5749057240000001
OrderedDict{K,V}: [0.624253, 0.605593, 0.610013, 0.606928, 0.609852], median=0.609851745

test_ins_del
============
Dict{K,V}: [0.614838, 0.615072, 0.615307, 0.614056, 0.615215], median=0.61507241
OrderedDict{K,V}: [0.654655, 0.658411, 0.656661, 0.656376, 0.654963], median=0.6563759739999999


## OrderedDictv1.jl
import Base.isempty, Base.length, Base.sizeof
import Base.start, Base.next, Base.done
import Base.has, Base.get
import Base.assign, Base.ref, Base.delete!, Base.empty!
import Base.show, Base.similar
import Base.findnextnot, Base.findfirstnot

type DictItem{K,V}
    k::K
    v::V
    idx::Int
end

type OrderedDict{K,V} <: Associative{K,V}
    ht::Dict{K,DictItem{K,V}}
    v::Vector{DictItem{K,V}}
    v_slots::BitArray
    ndel::Int

    OrderedDict() = new(Dict{K,DictItem{K,V}}(), similar(Array(DictItem{K,V},1), 0), BitArray(), 0)
    function OrderedDict(ks,vs)
        d = new(Dict{K,DictItem{K,V}}(), similar(Array(DictItem{K,V},1), 0), BitArray(), 0)
        for (i, (k, v)) in enumerate(zip(ks, vs))
            item = DictItem{K,V}(k,v,i)
            d.ht[k] = item
            push!(d.v, item)
            push!(d.v_slots, true)
        end
        d
    end
end
OrderedDict() = OrderedDict{Any, Any}()
OrderedDict(ks, vs) = OrderedDict{Any,Any}(ks, vs)
OrderedDict{K,V}(ks::AbstractArray{K}, vs::AbstractArray{V}) = OrderedDict{K,V}(ks, vs)

# syntax entry points
OrderedDict{K,V}(ks::(K...), vs::(V...)) = OrderedDict{K  ,V  }(ks, vs)
OrderedDict{K  }(ks::(K...), vs::Tuple ) = OrderedDict{K  ,Any}(ks, vs)
OrderedDict{V  }(ks::Tuple , vs::(V...)) = OrderedDict{Any,V  }(ks, vs)

similar{K,V}(d::OrderedDict{K,V}) = OrderedDict{K,V}()

# TODO: serialize, deserialize

## collections ##

isempty(d::OrderedDict) = isempty(d.ht)
length(d::OrderedDict) = length(d.ht)      # d.v may have empty slots
has(d::OrderedDict, key) = has(d.ht, key)

## associative ##

get(d::OrderedDict, key, default) = has(d, key) ? d[key] : default

function empty!(d::OrderedDict)
    empty!(d.ht)
    empty!(d.v)
    empty!(d.v_slots)
    d.ndel = 0
end

## indexable ##

ref(d::OrderedDict, key) = d.ht[key].v

function assign{K,V}(d::OrderedDict{K,V}, v, key)
    # 3/4 deleted?
    if d.ndel >= ((3*length(d))>>2)
        _compact(d)
    end

    if has(d, key)
        d.ht[key].v = v
    else
        item = DictItem{K,V}(key, v, length(d.v)+1)
        d.ht[key] = item
        push!(d.v, item)
        push!(d.v_slots, true)
    end
    d
end


## iterable ##

skip_deleted(d::OrderedDict, i) = findnext(d.v_slots, i)

start(d::OrderedDict) = skip_deleted(d,1)
done(d::OrderedDict, i) = (i == 0)
next(d::OrderedDict, i) = ((item=d.v[i]; (item.k, item.v)), skip_deleted(d,i+1))


## utility ##

function _compact(d::OrderedDict)
    v = d.v
    slots = d.v_slots

    # start with first empty slot
    s_pos = findfirstnot(slots)
    if s_pos == 0
        return
    end
    v_pos = findnext(slots, s_pos)

    # fill down empty slots with consecutive filled slots
    while v_pos != 0
        item = v[s_pos] = v[v_pos]
        item.idx = s_pos

        s_pos += 1
        v_pos = findnext(slots, v_pos+1)
    end

    new_sz = length(d)
    resize!(d.v, new_sz)
    resize!(d.v_slots, new_sz)
    slots[1:end] = true
    d.ndel = 0

    nothing
end

## associative ##


function delete!(d::OrderedDict, key)
    item = d.ht[key]
    delete!(d.ht, key)
    # Is this right?
    ccall(:jl_arrayunset, Void, (Any, Uint), d.v, item.idx-1)
    d.v_slots[item.idx] = false
    d.ndel += 1

    item.v
end

function delete!(d::OrderedDict, key, default)
    if has(d, key)
        delete!(d, key)
    else
        default
    end
end

## OrderedDictv3.jl
# Construction
import Base.similar, Base.sizehint

# Serialization
import Base.serialize, Base.deserialize

# Iteration
import Base.start, Base.next, Base.done

# General Collections
import Base.isempty, Base.empty!, Base.length

# Indexable Collections
import Base.setindex!, Base.getindex

# Associative Collections
import Base.has, Base.get, Base.getkey, Base.delete!

# Dequeue-like
import Base.push!, Base.pop!, Base.unshift!, Base.shift!, Base.append!, Base.insert!

# Useful, unexported by Base
import Base.findnextnot, Base.findfirstnot   ## from bitarray.jl
import Base._tablesz, Base.hashindex         ## from dict.jl

# Sorting

import Base.sort!, Base.sort, Base.sortby!, Base.sortby, Base.sortperm
import Sort.DEFAULT_UNSTABLE, Sort.DEFAULT_STABLE,
       Sort.Ordering, Sort.Algorithm,
       Sort.Forward, Sort.Reverse,
       Sort.By, Sort.Lt, Sort.lt

# Exports

export OrderedDict,
    similar,
    sizehint,
    start,
    next,
    done,
    empty!,
    getindex,
    setindex!,
    push!,
    pop!,
    unshift!,
    shift!,
    append!,
    insert!,
    sort,
    sort!,
    sortby,
    sortby!,
    sortperm,
    indexof,
    getitem


######################
## OrderedDict type ##

type OrderedDict{K,V} <: Associative{K,V}
    slots::Array{Uint8,1}
    keys::Array{K,1}
    vals::Array{V,1}
    ord_idxs::Array{Int,1}
    ord_slots::BitArray
    ord::Array{Int,1}
    ndel::Int
    odel::Int
    count::Int
    deleter::Function

    function OrderedDict()
        n = 16
        new(zeros(Uint8,n), Array(K,n), Array(V,n), Array(Int,n), BitArray(), Array(Int,0), 0, 0, 0, identity)
    end
    function OrderedDict(ks, vs)
        n = length(ks)
        h = OrderedDict{K,V}()
        for i=1:n
            h[ks[i]] = vs[i]
        end
        return h
    end
end
OrderedDict() = OrderedDict{Any,Any}()

OrderedDict{K,V}(ks::AbstractArray{K}, vs::AbstractArray{V}) = OrderedDict{K,V}(ks,vs)
OrderedDict(ks, vs) = OrderedDict{Any,Any}(ks, vs)

# syntax entry points
OrderedDict{K,V}(ks::(K...), vs::(V...)) = OrderedDict{K  ,V  }(ks, vs)
OrderedDict{K  }(ks::(K...), vs::Tuple ) = OrderedDict{K  ,Any}(ks, vs)
OrderedDict{V  }(ks::Tuple , vs::(V...)) = OrderedDict{Any,V  }(ks, vs)

##########################
## Construction-related ##

similar{K,V}(d::OrderedDict{K,V}) = OrderedDict{K,V}()

function sizehint(d::OrderedDict, newsz)
    oldsz = length(d.slots)
    if newsz <= oldsz
        # todo: shrink
        # be careful: rehash() assumes everything fits. it was only designed
        # for growing.
        return d
    end
    # grow at least 25%
    newsz = max(newsz, (oldsz*5)>>2)
    rehash(d, newsz)
end

## TODO: some of these are simply copied from base/dict.jl,
##       and the type was changed from Dict -> OrderedDict
##
##       (Field names might also be slightly different, but
##       can be reverted.)
##
##       It would be nice if they were defined in terms
##       of an AbstractDict <: Associative

###################
## Serialization ##
# TODO: Remove these if AbstractDict versions become available

function serialize(s, t::OrderedDict)
    serialize_type(s, typeof(t))
    write(s, int32(length(t)))
    for (k,v) in t
        serialize(s, k)
        serialize(s, v)
    end
end

function deserialize{K,V}(s, T::Type{OrderedDict{K,V}})
    n = read(s, Int32)
    t = T(); sizehint(t, n)
    for i = 1:n
        k = deserialize(s)
        v = deserialize(s)
        t[k] = v
    end
    return t
end

########################################
## Dict/OrderedDict Utility functions ##

# TODO: Remove these if AbstractDict versions become available
isslotempty(h::OrderedDict, i::Int) = h.slots[i] == 0x0
isslotfilled(h::OrderedDict, i::Int) = h.slots[i] == 0x1
isslotmissing(h::OrderedDict, i::Int) = h.slots[i] == 0x2

# OrderedDict version of rehash
function rehash{K,V}(h::OrderedDict{K,V}, newsz)
    newsz = _tablesz(newsz)
    nel = h.count
    h.ndel = h.count = 0
    if nel == 0
        resize!(h.slots, newsz)
        fill!(h.slots, 0)
        resize!(h.keys, newsz)
        resize!(h.vals, newsz)
        resize!(h.ord_idxs, newsz)
        resize!(h.ord_slots, 0)
        resize!(h.ord, 0)
        return h
    end
    olds = h.slots
    oldk = h.keys
    oldv = h.vals
    oldi = h.ord_idxs
    sz = length(olds)
    h.slots = zeros(Uint8,newsz)
    h.keys = Array(K, newsz)
    h.vals = Array(V, newsz)
    h.ord_idxs = Array(Int, newsz)

    for i = 1:sz
        if olds[i] == 0x1
            k = oldk[i]
            index = hashindex(k, newsz)
            while h.slots[index] != 0
                index = (index & (newsz-1)) + 1
            end
            h.slots[index] = 0x1
            h.keys[index] = k
            h.vals[index] = oldv[i]

            idx = oldi[i]
            h.ord_idxs[index] = idx
            h.ord[idx] = index
            h.count += 1
        end
    end

    return h
end

# get the index where a key is stored, or -1 if not present
# TODO: remove if AbstractDict version becomes available
function ht_keyindex{K,V}(h::OrderedDict{K,V}, key)
    sz = length(h.keys)
    iter = 0
    maxprobe = max(16, sz>>6)
    index = hashindex(key, sz)
    orig = index
    keys = h.keys

    while true
        if isslotempty(h,index)
            break
        end
        if !isslotmissing(h,index) && isequal(key,keys[index])
            return index
        end

        index = (index & (sz-1)) + 1
        iter+=1
        if iter > maxprobe || index==orig
            break
        end
    end

    return -1
end


# Removes empty slots of order array in OrderedDict
function _compact(h::OrderedDict)
    # start with first empty slot
    npos = findfirstnot(h.ord_slots)
    if npos == 0 return; end
    opos = findnext(h.ord_slots, npos)

    # fill down empty slots with consecutive filled slots
    while opos != 0
        index = h.ord[npos] = h.ord[opos]
        h.ord_idxs[index] = npos

        npos += 1
        opos = findnext(h.ord_slots, opos+1)
    end

    resize!(h.ord, h.count)
    resize!(h.ord_slots, h.count)
    h.ord_slots[1:end] = true
    h.odel = 0

    nothing
end

###############
## Iteration ##

skip_deleted(t::OrderedDict, i::Int) = findnext(t.ord_slots, i)

start(t::OrderedDict) = skip_deleted(t,1)
done(t::OrderedDict, i) = (i == 0)
next(t::OrderedDict, i) = (idx = t.ord[i]; ((t.keys[idx],t.vals[idx]), skip_deleted(t,i+1)))


#########################
## General Collections ##

isempty(t::OrderedDict) = (t.count == 0)
length(t::OrderedDict) = t.count

function empty!{K,V}(h::OrderedDict{K,V})
    fill!(h.slots, 0x0)
    sz = length(h.slots)
    h.keys = Array(K, sz)
    h.vals = Array(V, sz)
    h.ord_idxs = Array(Int, sz)
    h.ord_slots = BitArray()
    h.ord = Array(Int, 0)
    h.ndel = 0
    h.odel = 0
    h.count = 0
    return h
end

###########################
## Indexable Collections ##

# As with ref, we want to allow assignment by index position as well, as key,
# but we ignore this when K<:Number

function setindex!{K,V}(h::OrderedDict{K,V}, kv::(Any,Any), index::Integer)
    (key,v) = kv
    ord_idx = indexof(h,key,0)
    if ord_idx == index
        return setindex!(h, v, key)
    end
    # TODO: this can be more efficient
    delete!(h, h.keys[h.ord[index]])
    insert!(h, index, kv)
end

setindex!{K<:Number,V}(h::OrderedDict{K,V}, v::(Any,Any), key::Integer) =
    invoke(setindex!, (OrderedDict{K,V}, Any, Any), h, v, key)
setindex!{K<:Number,V}(h::OrderedDict{K,V}, v, key::Integer) =
    invoke(setindex!, (OrderedDict{K,V}, Any, Any), h, v, key)


function setindex!{K,V}(h::OrderedDict{K,V}, v, key)
    key = convert(K,key)
    v   = convert(V,  v)

    sz = length(h.keys)

    if h.ndel >= ((3*sz)>>2) || h.count*3 > sz*2
        # > 3/4 deleted or > 2/3 full
        rehash(h, h.count > 64000 ? h.count*2 : h.count*4)
        sz = length(h.keys)  # rehash may resize the table at this point!
    end

    if h.odel >= ((3*length(h.ord))>>2)
        # > 3/4 of ord array deleted
        _compact(h)
    end

    iter = 0
    maxprobe = max(16, sz>>6)
    index = hashindex(key, sz)
    orig = index
    avail = -1  # an available slot
    #keys = h.keys; vals = h.vals; ord_idxs = h.ord_idxs; ord_slots = h.ord_slots; ord = h.ord

    while true
        if isslotempty(h,index)
            if avail > 0; index = avail; end
            push!(h.ord, index)
            push!(h.ord_slots, true)
            h.slots[index] = 0x1
            h.keys[index] = key
            h.vals[index] = v
            h.ord_idxs[index] = length(h.ord)
            h.count += 1
            return h
        end

        if isslotmissing(h,index)
            if avail<0
                # found an available slot, but need to keep scanning
                # in case "key" already exists in a later collided slot.
                avail = index
            end
        elseif isequal(key, h.keys[index])
            h.vals[index] = v
            return h
        end

        index = (index & (sz-1)) + 1
        iter+=1
        if iter > maxprobe || index==orig
            break
        end
    end

    if avail>0
        index = avail
        push!(h.ord, index)
        push!(h.ord_slots, true)
        h.slots[index] = 0x1
        h.keys[index] = key
        h.vals[index] = v
        h.ord_idxs[index] = length(h.ord)
        h.count += 1
        return h
    end

    rehash(h, h.count > 64000 ? sz*2 : sz*4)

    setindex!(h, v, key)
end

# We want to allow the user to access the (k,v) pairs by index
# However, first and foremost, this is a dictionary, so if the
# keys are numbers, assume any reference using an integer
# as a key is attempting a has lookup.

# TODO: This might be confusing behavior, so consider disabling
# and simply sequential access through getitem()

getindex{K,V}(h::OrderedDict{K,V}, ord_idx::Integer) = getitem(h, ord_idx)

function getindex{K<:Number,V}(h::OrderedDict{K,V}, key::Integer)
    index = ht_keyindex(h, key)
    return (index<0) ? throw(KeyError(key)) : h.vals[index]::V
end

function getindex{K,V}(h::OrderedDict{K,V}, key)
    index = ht_keyindex(h, key)
    return (index<0) ? throw(KeyError(key)) : h.vals[index]::V
end

function indexof{K,V}(h::OrderedDict{K,V}, key)
    index = ht_keyindex(h, key)
    return (index<0) ? throw(KeyError(key)) : (_compact(h); h.ord_idxs[index])
end

function indexof{K,V}(h::OrderedDict{K,V}, key, deflt)
    index = ht_keyindex(h, key)
    return (index<0) ? deflt : (_compact(h); h.ord_idxs[index])
end

#############################
## Associative Collections ##

has(h::OrderedDict, key) = (ht_keyindex(h, key) >= 0)

function get{K,V}(h::OrderedDict{K,V}, key, deflt)
    index = ht_keyindex(h, key)
    return (index<0) ? deflt : h.vals[index]::V
end

function getkey{K,V}(h::OrderedDict{K,V}, key, deflt)
    index = ht_keyindex(h, key)
    return (index<0) ? deflt : h.keys[index]::K
end

function getitem{K,V}(h::OrderedDict{K,V}, ord_idx)
    _compact(h)
    index = h.ord[ord_idx]
    return (h.keys[index], h.vals[index])::(K,V)
end

function _delete!(h::OrderedDict, index)
    val = h.vals[index]
    idx = h.ord_idxs[index]
    h.slots[index] = 0x2
    ccall(:jl_arrayunset, Void, (Any, Uint), h.keys, index-1)
    ccall(:jl_arrayunset, Void, (Any, Uint), h.vals, index-1)
    # h.ord_idxs[index] = 0  ## not really necessary
    # ord[idx] = 0           ## not really necessary
    h.ord_slots[idx] = false
    h.ndel += 1
    h.odel += 1
    h.count -= 1
    return val
end

function delete!{K,V}(h::OrderedDict{K,V}, key)
    index = ht_keyindex(h, key)
    index > 0 ? _delete!(h, index) : throw(KeyError(key))
end

function delete!{K,V}(h::OrderedDict{K,V}, ord_idx::Integer)
    key = h.keys[h.ord[ord_idx]]
    (key, delete!(h, key))::(K,V)
end

delete!{K<:Number,V}(h::OrderedDict{K,V}, key::Integer) = invoke(delete!, (OrderedDict{K,V}, Any), h, key)

function delete!(h::OrderedDict, key, default)
    index = ht_keyindex(h, key)
    index > 0 ? _delete!(h, index) : default
end
	n = 100000
	strs = [randstring(10) for i = 1:n];
	nums = rand(Int, n);

	# regular map

	function dict_insertion_test{T<:Associative}(d::T)
	#empty!(d)
	for i = 1:n
	d[strs[i]] = nums[i]
	end
	d
	end

	function dict_deletion_test{T<:Associative}(d::T, iters::Int)
	#dict_insertion_test(d)
	for i in rand(1:n, iters)
	delete!(d, strs[i], 0)
	end
	d
	end

	function dict_ins_del_test{T<:Associative}(d::T, iters::Int)
	#dict_insertion_test(d)
	for i in rand(1:n, iters)
	randbool()? delete!(d, strs[i], 0) : (d[strs[i]] = nums[i])
	end
	d
	end


	function test_ins(T::Type, iter::Int)
	d = T{String,Int}()
	t = 0.0
	for i = 1:iter
	empty!(d)
	gc()
	t += @elapsed dict_insertion_test(d)
	end
	t
	end

	function test_ins_del(T::Type, iter::Int)
	d = T{String,Int}()
	t = 0.0
	for i = 1:iter
	empty!(d)
	dict_insertion_test(d)
	gc()
	t += @elapsed dict_ins_del_test(d, 100000)
	end
	t
	end

	function test_del(T::Type, iter::Int)
	d = T{String,Int}()
	t = 0.0
	for i = 1:iter
	empty!(d)
	dict_insertion_test(d)
	gc()
	t += @elapsed dict_deletion_test(d, 100000)
	end
	t
	end

	function run_all()
	for test in [test_ins, test_del, test_ins_del]
	println(test)
	println("="^length(string(test)))
	for T in [Dict, OrderedDict]
	print("$T: ")
	times = Float64[test(T, 5) for i = 1:5]
	println("$times, median=$(median(times))")
	end
	println()
	end
	end
	Here are 3 implemenations of an `OrderedDict` for julia. The first
	creates a `DictItem` class, which is stored as the value in a wrapped
	Dict. The second only stores a vector of keys, and the third
	reimplements the standard julia hash-based dictionary inline, and adds
	just enough information to maintain key-value order.

	I had other, similar versions along the way, with minor tweaks that
	didn't change the timings much. This includes a version where
	DictItem was immutable.


	One version I did not implement was a doubly-linked-list version,
	which should be much faster at deletion than the first two, but which
	is more limited in the operations that can be done with it (e.g., not
	shown here is code which allows the dictionary to be sorted or treated
	as a dequeue of (key, value) pairs). See
	https://github.com/kmsquire/OrderedCollections.jl for a version which
	includes this code.


	`dict_tests.jl` is timing code that tests the insertion of 100,000
	items, deletion of 100,000 items (with possible duplicates), and a
	random mix of insertion and deletions. The weak point of the first
	two versions is clearly deletion.

	============================================================================================
	OrderedDictv1.jl
	============================================================================================

	julia> require("dict_tests.jl")

	julia> require("OrderedDictv1.jl")

	julia> run_all()
	test_ins
	========
	Dict{K,V}: [0.339021, 0.293294, 0.292359, 0.293016, 0.291899], median=0.293015884
	OrderedDict{K,V}: [0.558897, 0.432915, 0.43435, 0.43405, 0.435232], median=0.43434986900000006

	test_del
	========
	Dict{K,V}: [0.584101, 0.565818, 0.565554, 0.564916, 0.565354], median=0.5655537869999999
	OrderedDict{K,V}: [0.797462, 0.785816, 0.781939, 0.783949, 0.784663], median=0.784663365

	test_ins_del
	============
	Dict{K,V}: [0.61392, 0.612826, 0.611372, 0.612516, 0.613474], median=0.612826085
	OrderedDict{K,V}: [0.829546, 0.827917, 0.828826, 0.828606, 0.831101], median=0.8288263260000001


	============================================================================================
	OrderedDictv3.jl
	============================================================================================
	julia> require("dict_tests.jl")

	julia> require("OrderedDictv3.jl")

	julia> run_all()
	test_ins
	========
	Dict{K,V}: [0.323519, 0.284433, 0.282385, 0.281725, 0.281555], median=0.282384714
	OrderedDict{K,V}: [0.489741, 0.380406, 0.37986, 0.380098, 0.37905], median=0.38009831499999996

	test_del
	========
	Dict{K,V}: [0.594232, 0.573725, 0.574156, 0.57504, 0.574906], median=0.5749057240000001
	OrderedDict{K,V}: [0.624253, 0.605593, 0.610013, 0.606928, 0.609852], median=0.609851745

	test_ins_del
	============
	Dict{K,V}: [0.614838, 0.615072, 0.615307, 0.614056, 0.615215], median=0.61507241
	OrderedDict{K,V}: [0.654655, 0.658411, 0.656661, 0.656376, 0.654963], median=0.6563759739999999
	import Base.isempty, Base.length, Base.sizeof
	import Base.start, Base.next, Base.done
	import Base.has, Base.get
	import Base.assign, Base.ref, Base.delete!, Base.empty!
	import Base.show, Base.similar
	import Base.findnextnot, Base.findfirstnot

	type DictItem{K,V}
	k::K
	v::V
	idx::Int
	end

	type OrderedDict{K,V} <: Associative{K,V}
	ht::Dict{K,DictItem{K,V}}
	v::Vector{DictItem{K,V}}
	v_slots::BitArray
	ndel::Int

	OrderedDict() = new(Dict{K,DictItem{K,V}}(), similar(Array(DictItem{K,V},1), 0), BitArray(), 0)
	function OrderedDict(ks,vs)
	d = new(Dict{K,DictItem{K,V}}(), similar(Array(DictItem{K,V},1), 0), BitArray(), 0)
	for (i, (k, v)) in enumerate(zip(ks, vs))
	item = DictItem{K,V}(k,v,i)
	d.ht[k] = item
	push!(d.v, item)
	push!(d.v_slots, true)
	end
	d
	end
	end
	OrderedDict() = OrderedDict{Any, Any}()
	OrderedDict(ks, vs) = OrderedDict{Any,Any}(ks, vs)
	OrderedDict{K,V}(ks::AbstractArray{K}, vs::AbstractArray{V}) = OrderedDict{K,V}(ks, vs)

	# syntax entry points
	OrderedDict{K,V}(ks::(K...), vs::(V...)) = OrderedDict{K ,V }(ks, vs)
	OrderedDict{K }(ks::(K...), vs::Tuple ) = OrderedDict{K ,Any}(ks, vs)
	OrderedDict{V }(ks::Tuple , vs::(V...)) = OrderedDict{Any,V }(ks, vs)

	similar{K,V}(d::OrderedDict{K,V}) = OrderedDict{K,V}()

	# TODO: serialize, deserialize

	## collections ##

	isempty(d::OrderedDict) = isempty(d.ht)
	length(d::OrderedDict) = length(d.ht) # d.v may have empty slots
	has(d::OrderedDict, key) = has(d.ht, key)

	## associative ##

	get(d::OrderedDict, key, default) = has(d, key) ? d[key] : default

	function empty!(d::OrderedDict)
	empty!(d.ht)
	empty!(d.v)
	empty!(d.v_slots)
	d.ndel = 0
	end

	## indexable ##

	ref(d::OrderedDict, key) = d.ht[key].v

	function assign{K,V}(d::OrderedDict{K,V}, v, key)
	# 3/4 deleted?
	if d.ndel >= ((3*length(d))>>2)
	_compact(d)
	end

	if has(d, key)
	d.ht[key].v = v
	else
	item = DictItem{K,V}(key, v, length(d.v)+1)
	d.ht[key] = item
	push!(d.v, item)
	push!(d.v_slots, true)
	end
	d
	end


	## iterable ##

	skip_deleted(d::OrderedDict, i) = findnext(d.v_slots, i)

	start(d::OrderedDict) = skip_deleted(d,1)
	done(d::OrderedDict, i) = (i == 0)
	next(d::OrderedDict, i) = ((item=d.v[i]; (item.k, item.v)), skip_deleted(d,i+1))


	## utility ##

	function _compact(d::OrderedDict)
	v = d.v
	slots = d.v_slots

	# start with first empty slot
	s_pos = findfirstnot(slots)
	if s_pos == 0
	return
	end
	v_pos = findnext(slots, s_pos)

	# fill down empty slots with consecutive filled slots
	while v_pos != 0
	item = v[s_pos] = v[v_pos]
	item.idx = s_pos

	s_pos += 1
	v_pos = findnext(slots, v_pos+1)
	end

	new_sz = length(d)
	resize!(d.v, new_sz)
	resize!(d.v_slots, new_sz)
	slots[1:end] = true
	d.ndel = 0

	nothing
	end

	## associative ##


	function delete!(d::OrderedDict, key)
	item = d.ht[key]
	delete!(d.ht, key)
	# Is this right?
	ccall(:jl_arrayunset, Void, (Any, Uint), d.v, item.idx-1)
	d.v_slots[item.idx] = false
	d.ndel += 1

	item.v
	end

	function delete!(d::OrderedDict, key, default)
	if has(d, key)
	delete!(d, key)
	else
	default
	end
	end
	# Construction
	import Base.similar, Base.sizehint

	# Serialization
	import Base.serialize, Base.deserialize

	# Iteration
	import Base.start, Base.next, Base.done

	# General Collections
	import Base.isempty, Base.empty!, Base.length

	# Indexable Collections
	import Base.setindex!, Base.getindex

	# Associative Collections
	import Base.has, Base.get, Base.getkey, Base.delete!

	# Dequeue-like
	import Base.push!, Base.pop!, Base.unshift!, Base.shift!, Base.append!, Base.insert!

	# Useful, unexported by Base
	import Base.findnextnot, Base.findfirstnot ## from bitarray.jl
	import Base._tablesz, Base.hashindex ## from dict.jl

	# Sorting

	import Base.sort!, Base.sort, Base.sortby!, Base.sortby, Base.sortperm
	import Sort.DEFAULT_UNSTABLE, Sort.DEFAULT_STABLE,
	Sort.Ordering, Sort.Algorithm,
	Sort.Forward, Sort.Reverse,
	Sort.By, Sort.Lt, Sort.lt

	# Exports

	export OrderedDict,
	similar,
	sizehint,
	start,
	next,
	done,
	empty!,
	getindex,
	setindex!,
	push!,
	pop!,
	unshift!,
	shift!,
	append!,
	insert!,
	sort,
	sort!,
	sortby,
	sortby!,
	sortperm,
	indexof,
	getitem


	######################
	## OrderedDict type ##

	type OrderedDict{K,V} <: Associative{K,V}
	slots::Array{Uint8,1}
	keys::Array{K,1}
	vals::Array{V,1}
	ord_idxs::Array{Int,1}
	ord_slots::BitArray
	ord::Array{Int,1}
	ndel::Int
	odel::Int
	count::Int
	deleter::Function

	function OrderedDict()
	n = 16
	new(zeros(Uint8,n), Array(K,n), Array(V,n), Array(Int,n), BitArray(), Array(Int,0), 0, 0, 0, identity)
	end
	function OrderedDict(ks, vs)
	n = length(ks)
	h = OrderedDict{K,V}()
	for i=1:n
	h[ks[i]] = vs[i]
	end
	return h
	end
	end
	OrderedDict() = OrderedDict{Any,Any}()

	OrderedDict{K,V}(ks::AbstractArray{K}, vs::AbstractArray{V}) = OrderedDict{K,V}(ks,vs)
	OrderedDict(ks, vs) = OrderedDict{Any,Any}(ks, vs)

	# syntax entry points
	OrderedDict{K,V}(ks::(K...), vs::(V...)) = OrderedDict{K ,V }(ks, vs)
	OrderedDict{K }(ks::(K...), vs::Tuple ) = OrderedDict{K ,Any}(ks, vs)
	OrderedDict{V }(ks::Tuple , vs::(V...)) = OrderedDict{Any,V }(ks, vs)

	##########################
	## Construction-related ##

	similar{K,V}(d::OrderedDict{K,V}) = OrderedDict{K,V}()

	function sizehint(d::OrderedDict, newsz)
	oldsz = length(d.slots)
	if newsz <= oldsz
	# todo: shrink
	# be careful: rehash() assumes everything fits. it was only designed
	# for growing.
	return d
	end
	# grow at least 25%
	newsz = max(newsz, (oldsz*5)>>2)
	rehash(d, newsz)
	end

	## TODO: some of these are simply copied from base/dict.jl,
	## and the type was changed from Dict -> OrderedDict
	##
	## (Field names might also be slightly different, but
	## can be reverted.)
	##
	## It would be nice if they were defined in terms
	## of an AbstractDict <: Associative

	###################
	## Serialization ##
	# TODO: Remove these if AbstractDict versions become available

	function serialize(s, t::OrderedDict)
	serialize_type(s, typeof(t))
	write(s, int32(length(t)))
	for (k,v) in t
	serialize(s, k)
	serialize(s, v)
	end
	end

	function deserialize{K,V}(s, T::Type{OrderedDict{K,V}})
	n = read(s, Int32)
	t = T(); sizehint(t, n)
	for i = 1:n
	k = deserialize(s)
	v = deserialize(s)
	t[k] = v
	end
	return t
	end

	########################################
	## Dict/OrderedDict Utility functions ##

	# TODO: Remove these if AbstractDict versions become available
	isslotempty(h::OrderedDict, i::Int) = h.slots[i] == 0x0
	isslotfilled(h::OrderedDict, i::Int) = h.slots[i] == 0x1
	isslotmissing(h::OrderedDict, i::Int) = h.slots[i] == 0x2

	# OrderedDict version of rehash
	function rehash{K,V}(h::OrderedDict{K,V}, newsz)
	newsz = _tablesz(newsz)
	nel = h.count
	h.ndel = h.count = 0
	if nel == 0
	resize!(h.slots, newsz)
	fill!(h.slots, 0)
	resize!(h.keys, newsz)
	resize!(h.vals, newsz)
	resize!(h.ord_idxs, newsz)
	resize!(h.ord_slots, 0)
	resize!(h.ord, 0)
	return h
	end
	olds = h.slots
	oldk = h.keys
	oldv = h.vals
	oldi = h.ord_idxs
	sz = length(olds)
	h.slots = zeros(Uint8,newsz)
	h.keys = Array(K, newsz)
	h.vals = Array(V, newsz)
	h.ord_idxs = Array(Int, newsz)

	for i = 1:sz
	if olds[i] == 0x1
	k = oldk[i]
	index = hashindex(k, newsz)
	while h.slots[index] != 0
	index = (index & (newsz-1)) + 1
	end
	h.slots[index] = 0x1
	h.keys[index] = k
	h.vals[index] = oldv[i]

	idx = oldi[i]
	h.ord_idxs[index] = idx
	h.ord[idx] = index
	h.count += 1
	end
	end

	return h
	end

	# get the index where a key is stored, or -1 if not present
	# TODO: remove if AbstractDict version becomes available
	function ht_keyindex{K,V}(h::OrderedDict{K,V}, key)
	sz = length(h.keys)
	iter = 0
	maxprobe = max(16, sz>>6)
	index = hashindex(key, sz)
	orig = index
	keys = h.keys

	while true
	if isslotempty(h,index)
	break
	end
	if !isslotmissing(h,index) && isequal(key,keys[index])
	return index
	end

	index = (index & (sz-1)) + 1
	iter+=1
	if iter > maxprobe \|\| index==orig
	break
	end
	end

	return -1
	end


	# Removes empty slots of order array in OrderedDict
	function _compact(h::OrderedDict)
	# start with first empty slot
	npos = findfirstnot(h.ord_slots)
	if npos == 0 return; end
	opos = findnext(h.ord_slots, npos)

	# fill down empty slots with consecutive filled slots
	while opos != 0
	index = h.ord[npos] = h.ord[opos]
	h.ord_idxs[index] = npos

	npos += 1
	opos = findnext(h.ord_slots, opos+1)
	end

	resize!(h.ord, h.count)
	resize!(h.ord_slots, h.count)
	h.ord_slots[1:end] = true
	h.odel = 0

	nothing
	end

	###############
	## Iteration ##

	skip_deleted(t::OrderedDict, i::Int) = findnext(t.ord_slots, i)

	start(t::OrderedDict) = skip_deleted(t,1)
	done(t::OrderedDict, i) = (i == 0)
	next(t::OrderedDict, i) = (idx = t.ord[i]; ((t.keys[idx],t.vals[idx]), skip_deleted(t,i+1)))


	#########################
	## General Collections ##

	isempty(t::OrderedDict) = (t.count == 0)
	length(t::OrderedDict) = t.count

	function empty!{K,V}(h::OrderedDict{K,V})
	fill!(h.slots, 0x0)
	sz = length(h.slots)
	h.keys = Array(K, sz)
	h.vals = Array(V, sz)
	h.ord_idxs = Array(Int, sz)
	h.ord_slots = BitArray()
	h.ord = Array(Int, 0)
	h.ndel = 0
	h.odel = 0
	h.count = 0
	return h
	end

	###########################
	## Indexable Collections ##

	# As with ref, we want to allow assignment by index position as well, as key,
	# but we ignore this when K<:Number

	function setindex!{K,V}(h::OrderedDict{K,V}, kv::(Any,Any), index::Integer)
	(key,v) = kv
	ord_idx = indexof(h,key,0)
	if ord_idx == index
	return setindex!(h, v, key)
	end
	# TODO: this can be more efficient
	delete!(h, h.keys[h.ord[index]])
	insert!(h, index, kv)
	end

	setindex!{K<:Number,V}(h::OrderedDict{K,V}, v::(Any,Any), key::Integer) =
	invoke(setindex!, (OrderedDict{K,V}, Any, Any), h, v, key)
	setindex!{K<:Number,V}(h::OrderedDict{K,V}, v, key::Integer) =
	invoke(setindex!, (OrderedDict{K,V}, Any, Any), h, v, key)


	function setindex!{K,V}(h::OrderedDict{K,V}, v, key)
	key = convert(K,key)
	v = convert(V, v)

	sz = length(h.keys)

	if h.ndel >= ((3sz)>>2) \|\| h.count3 > sz*2
	# > 3/4 deleted or > 2/3 full
	rehash(h, h.count > 64000 ? h.count2 : h.count4)
	sz = length(h.keys) # rehash may resize the table at this point!
	end

	if h.odel >= ((3*length(h.ord))>>2)
	# > 3/4 of ord array deleted
	_compact(h)
	end

	iter = 0
	maxprobe = max(16, sz>>6)
	index = hashindex(key, sz)
	orig = index
	avail = -1 # an available slot
	#keys = h.keys; vals = h.vals; ord_idxs = h.ord_idxs; ord_slots = h.ord_slots; ord = h.ord

	while true
	if isslotempty(h,index)
	if avail > 0; index = avail; end
	push!(h.ord, index)
	push!(h.ord_slots, true)
	h.slots[index] = 0x1
	h.keys[index] = key
	h.vals[index] = v
	h.ord_idxs[index] = length(h.ord)
	h.count += 1
	return h
	end

	if isslotmissing(h,index)
	if avail<0
	# found an available slot, but need to keep scanning
	# in case "key" already exists in a later collided slot.
	avail = index
	end
	elseif isequal(key, h.keys[index])
	h.vals[index] = v
	return h
	end

	index = (index & (sz-1)) + 1
	iter+=1
	if iter > maxprobe \|\| index==orig
	break
	end
	end

	if avail>0
	index = avail
	push!(h.ord, index)
	push!(h.ord_slots, true)
	h.slots[index] = 0x1
	h.keys[index] = key
	h.vals[index] = v
	h.ord_idxs[index] = length(h.ord)
	h.count += 1
	return h
	end

	rehash(h, h.count > 64000 ? sz2 : sz4)

	setindex!(h, v, key)
	end

	# We want to allow the user to access the (k,v) pairs by index
	# However, first and foremost, this is a dictionary, so if the
	# keys are numbers, assume any reference using an integer
	# as a key is attempting a has lookup.

	# TODO: This might be confusing behavior, so consider disabling
	# and simply sequential access through getitem()

	getindex{K,V}(h::OrderedDict{K,V}, ord_idx::Integer) = getitem(h, ord_idx)

	function getindex{K<:Number,V}(h::OrderedDict{K,V}, key::Integer)
	index = ht_keyindex(h, key)
	return (index<0) ? throw(KeyError(key)) : h.vals[index]::V
	end

	function getindex{K,V}(h::OrderedDict{K,V}, key)
	index = ht_keyindex(h, key)
	return (index<0) ? throw(KeyError(key)) : h.vals[index]::V
	end

	function indexof{K,V}(h::OrderedDict{K,V}, key)
	index = ht_keyindex(h, key)
	return (index<0) ? throw(KeyError(key)) : (_compact(h); h.ord_idxs[index])
	end

	function indexof{K,V}(h::OrderedDict{K,V}, key, deflt)
	index = ht_keyindex(h, key)
	return (index<0) ? deflt : (_compact(h); h.ord_idxs[index])
	end

	#############################
	## Associative Collections ##

	has(h::OrderedDict, key) = (ht_keyindex(h, key) >= 0)

	function get{K,V}(h::OrderedDict{K,V}, key, deflt)
	index = ht_keyindex(h, key)
	return (index<0) ? deflt : h.vals[index]::V
	end

	function getkey{K,V}(h::OrderedDict{K,V}, key, deflt)
	index = ht_keyindex(h, key)
	return (index<0) ? deflt : h.keys[index]::K
	end

	function getitem{K,V}(h::OrderedDict{K,V}, ord_idx)
	_compact(h)
	index = h.ord[ord_idx]
	return (h.keys[index], h.vals[index])::(K,V)
	end

	function _delete!(h::OrderedDict, index)
	val = h.vals[index]
	idx = h.ord_idxs[index]
	h.slots[index] = 0x2
	ccall(:jl_arrayunset, Void, (Any, Uint), h.keys, index-1)
	ccall(:jl_arrayunset, Void, (Any, Uint), h.vals, index-1)
	# h.ord_idxs[index] = 0 ## not really necessary
	# ord[idx] = 0 ## not really necessary
	h.ord_slots[idx] = false
	h.ndel += 1
	h.odel += 1
	h.count -= 1
	return val
	end

	function delete!{K,V}(h::OrderedDict{K,V}, key)
	index = ht_keyindex(h, key)
	index > 0 ? _delete!(h, index) : throw(KeyError(key))
	end

	function delete!{K,V}(h::OrderedDict{K,V}, ord_idx::Integer)
	key = h.keys[h.ord[ord_idx]]
	(key, delete!(h, key))::(K,V)
	end

	delete!{K<:Number,V}(h::OrderedDict{K,V}, key::Integer) = invoke(delete!, (OrderedDict{K,V}, Any), h, key)

	function delete!(h::OrderedDict, key, default)
	index = ht_keyindex(h, key)
	index > 0 ? _delete!(h, index) : default
	end