simonster/gist:b1b4cc2ad0daa8e20a99

## gistfile1.jl
immutable CheapMulBool
    x::Bool
end
immutable Negated{T}
     x::T
end
immutable BitWrapper
     x::Vector{Uint64}
end
Base.getindex(x::BitWrapper, i) = CheapMulBool(!Base.unsafe_bitgetindex(x.x, i))
Base.getindex(x::Negated, i) = CheapMulBool(unsafe_load(convert(Ptr{Uint8}, pointer(x.x)), i) == 0)
*(x::CheapMulBool, y::Number) = ifelse(x.x, y, zero(y))

function f(x, y)
    z = zero(eltype(x))
    @simd for i = 1:length(x)
       @inbounds z += y[i]*x[i]
    end
    z
end

x = randn(10000000)
ybool = rand(Bool, length(x))
ybit = bitpack(ybool)
yfloat = convert(Vector{Float64}, !ybool)
f(x, Negated(ybool))
f(x, BitWrapper(ybit.chunks))
f(x, yfloat)
@time for i = 1:50 f(x, Negated(ybool)) end
@time for i = 1:50 f(x, BitWrapper(ybit.chunks)) end
@time for i = 1:50 f(x, yfloat) end
	immutable CheapMulBool
	x::Bool
	end
	immutable Negated{T}
	x::T
	end
	immutable BitWrapper
	x::Vector{Uint64}
	end
	Base.getindex(x::BitWrapper, i) = CheapMulBool(!Base.unsafe_bitgetindex(x.x, i))
	Base.getindex(x::Negated, i) = CheapMulBool(unsafe_load(convert(Ptr{Uint8}, pointer(x.x)), i) == 0)
	*(x::CheapMulBool, y::Number) = ifelse(x.x, y, zero(y))

	function f(x, y)
	z = zero(eltype(x))
	@simd for i = 1:length(x)
	@inbounds z += y[i]*x[i]
	end
	z
	end

	x = randn(10000000)
	ybool = rand(Bool, length(x))
	ybit = bitpack(ybool)
	yfloat = convert(Vector{Float64}, !ybool)
	f(x, Negated(ybool))
	f(x, BitWrapper(ybit.chunks))
	f(x, yfloat)
	@time for i = 1:50 f(x, Negated(ybool)) end
	@time for i = 1:50 f(x, BitWrapper(ybit.chunks)) end
	@time for i = 1:50 f(x, yfloat) end