Created
December 20, 2020 07:05
-
-
Save Roger-luo/0df73cabf4c91f9854657fdd2ed66767 to your computer and use it in GitHub Desktop.
Implement your own (full amplitude) top performance quantum circuit emulator in ONE day!
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
macro _threads(ex) | |
return quote | |
if (Threads.nthreads() > 1) && (length(st) > 4096) | |
$(Expr(:macrocall, Expr(:(.), :Threads, QuoteNode(Symbol("@threads"))), __source__, ex)) | |
else | |
$ex | |
end | |
end |> esc | |
end | |
for N in [8, 16, 32, 64, 128] | |
T = Symbol(:Int, N) | |
UT = Symbol(:UInt, N) | |
@eval begin | |
log2i(x::$T) = | |
!signbit(x) ? ($(N - 1) - leading_zeros(x)) : | |
throw(ErrorException("nonnegative expected ($x)")) | |
log2i(x::$UT) = $(N - 1) - leading_zeros(x) | |
end | |
end | |
bmask(args...) = bmask(Int, args...) | |
bmask(::Type{T}) where {T<:Integer} = zero(T) | |
bmask(::Type{T}, positions::Int...) where {T<:Integer} = bmask(T, positions) | |
function bmask(::Type{T}, itr) where {T<:Integer} | |
isempty(itr) && return 0 | |
ret = zero(T) | |
for b in itr | |
ret += one(T) << (b - 1) | |
end | |
return ret | |
end | |
@inline function bmask(::Type{T}, range::UnitRange{Int})::T where {T<:Integer} | |
((one(T) << (range.stop - range.start + 1)) - one(T)) << (range.start - 1) | |
end | |
@inline lmove(b::Int, mask::Int, k::Int)::Int = (b & ~mask) << k + (b & mask) | |
@inline function _group_shift(masks::Vector{Int}, shift_len::Vector{Int}, k::Int, k_prv::Int) | |
# if current position in the contiguous region | |
# since these bits will be moved together with | |
# the first one, we don't need to generate a | |
# new mask | |
if k == k_prv + 1 | |
shift_len[end] += 1 | |
else | |
# we generate a bit mask where the 1st to k-th bits are 1 | |
push!(masks, bmask(0:k-1)) | |
push!(shift_len, 1) | |
end | |
end | |
@inline function group_shift(locations) | |
masks = Int[] | |
shift_len = Int[] | |
k_prv = -1 | |
for k in locations | |
_group_shift(masks, shift_len, k, k_prv) | |
k_prv = k | |
end | |
return masks, shift_len | |
end | |
@inline function complement_group_shift(n::Int, locations) | |
masks = Int[] | |
shift_len = Int[] | |
k_prv = -1 | |
for k in 1:n | |
k in locations && continue | |
_group_shift(masks, shift_len, k, k_prv) | |
k_prv = k | |
end | |
return masks, shift_len | |
end | |
struct BitSubspace | |
n::Int # number of bits in fullspace | |
sz_subspace::Int # size of the subspace | |
n_shifts::Int # number of shifts | |
masks::Vector{Int} # shift masks | |
shift_len::Vector{Int} # length of each shift | |
end | |
function Base.getindex(s::BitSubspace, i::Int) | |
index = i - 1 | |
@inbounds for k in 1:s.n_shifts | |
index = lmove(index, s.masks[k], s.shift_len[k]) | |
end | |
return index | |
end | |
Base.firstindex(s::BitSubspace) = 1 | |
Base.lastindex(s::BitSubspace) = s.sz_subspace | |
Base.length(s::BitSubspace) = s.sz_subspace | |
Base.eltype(::BitSubspace) = Int | |
function Base.iterate(s::BitSubspace, st::Int = 1) | |
st <= length(s) || return | |
return s[st], st + 1 | |
end | |
function bsubspace(n::Int, locs) | |
@assert issorted(locs) | |
masks, shift_len = group_shift(locs) | |
BitSubspace(n, 1 << (n - length(locs)), length(masks), masks, shift_len) | |
end | |
function bcomspace(n::Int, locs) | |
@assert issorted(locs) | |
masks, shift_len = complement_group_shift(n, locs) | |
BitSubspace(n, 1 << length(locs), length(masks), masks, shift_len) | |
end | |
log2dim1(x) = log2i(size(x, 1)) | |
function broutine!(st::AbstractVector, U::AbstractMatrix, locs::NTuple{N, Int}) where N | |
n = log2dim1(st) | |
subspace = bsubspace(n, locs) | |
comspace = bcomspace(n, locs) | |
indices = [idx + 1 for idx in comspace] | |
for k in subspace | |
idx = indices .+ k | |
st[idx] = U * st[idx] | |
end | |
return st | |
end | |
function Base.show(io::IO, ::MIME"text/plain", s::BitSubspace) | |
indent = get(io, :indent, 0) | |
println(io, " "^indent, s.sz_subspace, "-element BitSubspace:") | |
if s.sz_subspace < 5 | |
for k in 1:s.sz_subspace | |
print(io, " "^(indent+1), string(s[k]; base=2, pad=s.n)) | |
if k != s.sz_subspace | |
println(io) | |
end | |
end | |
else # never print more than 4 elements | |
println(io, " "^(indent+1), string(s[1]; base=2, pad=s.n)) | |
println(io, " "^(indent+1), string(s[2]; base=2, pad=s.n)) | |
println(io, " "^(indent+1), "⋮") | |
println(io, " "^(indent+1), string(s[end-1]; base=2, pad=s.n)) | |
print(io, " "^(indent+1), string(s[end]; base=2, pad=s.n)) | |
end | |
end | |
function subspace_mul!(st::AbstractVector{T}, comspace, U, subspace, offset=0) where T | |
# pretend this is deleted | |
# if size(U, 1) == 2 | |
# subspace_mul2x2!(st, comspace, U, subspace, offset) | |
# else | |
if size(U, 1) == 4 | |
subspace_mul4x4!(st, comspace, U, subspace, offset) | |
elseif size(U, 1) == 8 | |
subspace_mul8x8!(st, comspace, U, subspace, offset) | |
else | |
subspace_mul_generic!(st, comspace, U, subspace, offset) | |
end | |
return st | |
end | |
# pretend this is deleted | |
# function subspace_mul2x2!(st::AbstractVector{T}, comspace, U, subspace, offset=0) where T | |
# indices_1 = comspace[1] + 1 | |
# indices_2 = comspace[2] + 1 | |
# @inbounds for k in subspace | |
# idx_1 = indices_1 + k + offset | |
# idx_2 = indices_2 + k + offset | |
# T1 = U[1, 1] * st[idx_1] + U[1, 2] * st[idx_2] | |
# T2 = U[2, 1] * st[idx_1] + U[2, 2] * st[idx_2] | |
# st[idx_1] = T1 | |
# st[idx_2] = T2 | |
# end | |
# return st | |
# end | |
function subspace_mul4x4!(st::AbstractVector{T}, comspace, U, subspace, offset=0) where T | |
Base.Cartesian.@nextract 4 indices i -> comspace[i] + 1 | |
Base.Cartesian.@nextract 4 U i->begin | |
Base.Cartesian.@nextract 4 U_i j->U[i, j] | |
end | |
for k in subspace | |
Base.Cartesian.@nextract 4 idx i-> k + indices_i + offset | |
Base.Cartesian.@nexprs 4 i -> begin | |
y_i = zero(T) | |
Base.Cartesian.@nexprs 4 j -> begin | |
y_i += U_i_j * st[idx_j] | |
end | |
end | |
Base.Cartesian.@nexprs 4 i -> begin | |
st[idx_i] = y_i | |
end | |
end | |
return st | |
end | |
function subspace_mul8x8!(st::AbstractVector{T}, comspace, U, subspace, offset=0) where T | |
Base.Cartesian.@nextract 8 indices i -> comspace[i] + 1 | |
Base.Cartesian.@nextract 8 U i->begin | |
Base.Cartesian.@nextract 8 U_i j->U[i, j] | |
end | |
@inbounds for k in subspace | |
Base.Cartesian.@nextract 8 idx i-> k + indices_i + offset | |
Base.Cartesian.@nexprs 8 i -> begin | |
y_i = zero(T) | |
Base.Cartesian.@nexprs 8 j -> begin | |
y_i += U_i_j * st[idx_j] | |
end | |
end | |
Base.Cartesian.@nexprs 8 i -> begin | |
st[idx_i] = y_i | |
end | |
end | |
return st | |
end | |
function subspace_mul_generic!(st::AbstractVector{T}, indices, U, subspace, offset=0) where T | |
y = similar(st, (size(U, 1), )) | |
idx = similar(indices) | |
@inbounds for k in subspace | |
for i in 1:size(U, 1) | |
idx[i] = indices[i] + k + offset | |
end | |
for i in 1:size(U, 1) | |
y[i] = zero(T) | |
for j in 1:size(U, 2) | |
y[i] += U[i, j] * st[idx[j]] | |
end | |
end | |
for i in 1:size(U, 1) | |
st[idx[i]] = y[i] | |
end | |
end | |
return st | |
end | |
function broutine2x2!(st::AbstractVector{T}, U::AbstractMatrix, locs::Tuple{Int}) where T | |
U11 = U[1, 1]; U12 = U[1, 2]; | |
U21 = U[2, 1]; U22 = U[2, 2]; | |
step_1 = 1 << (first(locs) - 1) | |
step_2 = 1 << first(locs) | |
@inbounds if step_1 == 1 | |
for j in 0:step_2:size(st, 1)-step_1 | |
ST1 = U11 * st[j + 1] + U12 * st[j + 1 + step_1] | |
ST2 = U21 * st[j + 1] + U22 * st[j + 1 + step_1] | |
st[j + 1] = ST1 | |
st[j + 1 + step_1] = ST2 | |
end | |
elseif step_1 == 2 | |
for j in 0:step_2:size(st, 1)-step_1 | |
Base.Cartesian.@nexprs 2 i->begin | |
ST1 = U11 * st[j + i] + U12 * st[j + i + step_1] | |
ST2 = U21 * st[j + i] + U22 * st[j + i + step_1] | |
st[j + i] = ST1 | |
st[j + i + step_1] = ST2 | |
end | |
end | |
elseif step_1 == 4 | |
for j in 0:step_2:size(st, 1)-step_1 | |
Base.Cartesian.@nexprs 4 i->begin | |
ST1 = U11 * st[j + i] + U12 * st[j + i + step_1] | |
ST2 = U21 * st[j + i] + U22 * st[j + i + step_1] | |
st[j + i] = ST1 | |
st[j + i + step_1] = ST2 | |
end | |
end | |
elseif step_1 == 8 | |
for j in 0:step_2:size(st, 1)-step_1 | |
Base.Cartesian.@nexprs 8 i->begin | |
ST1 = U11 * st[j + i] + U12 * st[j + i + step_1] | |
ST2 = U21 * st[j + i] + U22 * st[j + i + step_1] | |
st[j + i] = ST1 | |
st[j + i + step_1] = ST2 | |
end | |
end | |
else | |
for j in 0:step_2:size(st, 1)-step_1 | |
for i in j:8:j+step_1-1 | |
Base.Cartesian.@nexprs 8 k->begin | |
ST1 = U11 * st[i + k] + U12 * st[i + step_1 + k] | |
ST2 = U21 * st[i + k] + U22 * st[i + step_1 + k] | |
st[i + k] = ST1 | |
st[i + step_1 + k] = ST2 | |
end | |
end | |
end | |
end | |
return st | |
end | |
function broutine!(st::AbstractVector, U::AbstractMatrix, locs::NTuple{N, Int}) where N | |
size(U, 1) == 2 && return broutine2x2!(st, U, locs) | |
n = log2dim1(st) | |
subspace = bsubspace(n, locs) | |
comspace = bcomspace(n, locs) | |
subspace_mul!(st, comspace, U, subspace) | |
return st | |
end | |
function threaded_broutine!(st::AbstractVector, U::AbstractMatrix, locs::NTuple{N, Int}) where N | |
n = log2dim1(st) | |
subspace = bsubspace(n, locs) | |
comspace = bcomspace(n, locs) | |
indices = [idx + 1 for idx in comspace] | |
@inbounds @views Threads.@threads for k in subspace | |
idx = indices .+ k | |
st[idx] = U * st[idx] | |
end | |
return st | |
end | |
ctrl_offset(locs, configs) = bmask(locs[i] for (i, u) in enumerate(configs) if u != 0) | |
ismatch(index::T, mask::T, target::T) where {T<:Integer} = (index & mask) == target | |
function broutine2x2!(st::AbstractVector, U::AbstractMatrix, locs::Tuple{Int}, ctrl_locs::NTuple{M, Int}, ctrl_configs::NTuple{M, Int}) where {N, M} | |
step_1 = 1 << (first(locs) - 1) | |
step_2 = 1 << first(locs) | |
ctrl_mask = bmask(ctrl_locs) | |
flag_mask = reduce(+, 1 << (ctrl_locs[i] - 1) for i in 1:length(ctrl_locs) if ctrl_configs[i] == 1) | |
U11 = U[1, 1]; U12 = U[1, 2]; | |
U21 = U[2, 1]; U22 = U[2, 2]; | |
@inbounds if step_1 == 1 | |
for j in 0:step_2:size(st, 1)-step_1 | |
if ismatch(j, ctrl_mask, flag_mask) | |
ST1 = U11 * st[j+1] + U12 * st[j + step_1 + 1] | |
ST2 = U21 * st[j+1] + U22 * st[j + step_1 + 1] | |
st[j + 1] = ST1 | |
st[j + step_1 + 1] = ST2 | |
end | |
end | |
elseif step_1 == 2 | |
for j in 0:step_2:size(st, 1)-step_1 | |
Base.Cartesian.@nexprs 2 i->begin | |
if ismatch(j+i-1, ctrl_mask, flag_mask) | |
ST1 = U11 * st[j+i] + U12 * st[j + step_1 + i] | |
ST2 = U21 * st[j+i] + U22 * st[j + step_1 + i] | |
st[j + i] = ST1 | |
st[j + step_1 + i] = ST2 | |
end | |
end | |
end | |
elseif step_1 == 4 | |
for j in 0:step_2:size(st, 1)-step_1 | |
Base.Cartesian.@nexprs 4 i->begin | |
if ismatch(j+i-1, ctrl_mask, flag_mask) | |
ST1 = U11 * st[j+i] + U12 * st[j + step_1 + i] | |
ST2 = U21 * st[j+i] + U22 * st[j + step_1 + i] | |
st[j + i] = ST1 | |
st[j + step_1 + i] = ST2 | |
end | |
end | |
end | |
elseif step_1 == 8 | |
for j in 0:step_2:size(st, 1)-step_1 | |
Base.Cartesian.@nexprs 8 i->begin | |
if ismatch(j+i-1, ctrl_mask, flag_mask) | |
ST1 = U11 * st[j+i] + U12 * st[j + step_1 + i] | |
ST2 = U21 * st[j+i] + U22 * st[j + step_1 + i] | |
st[j + i] = ST1 | |
st[j + step_1 + i] = ST2 | |
end | |
end | |
end | |
else | |
for j in 0:step_2:size(st, 1)-step_1 | |
for i in j:8:j+step_1-1 | |
Base.Cartesian.@nexprs 8 k->begin | |
if ismatch(i+k-1, ctrl_mask, flag_mask) | |
ST1 = U11 * st[i+k] + U12 * st[i + step_1 + k] | |
ST2 = U21 * st[i+k] + U22 * st[i + step_1 + k] | |
st[i + k] = ST1 | |
st[i + step_1 + k] = ST2 | |
end | |
end | |
end | |
end | |
end | |
return st | |
end | |
function broutine!(st::AbstractVector, U::AbstractMatrix, locs::NTuple{N, Int}, ctrl_locs::NTuple{M, Int}, ctrl_configs::NTuple{M, Int}) where {N, M} | |
size(U, 1) == 2 && return broutine2x2!(st, U, locs, ctrl_locs, ctrl_configs) | |
n = log2dim1(st) | |
subspace = bsubspace(n, sort([locs..., ctrl_locs...])) | |
comspace = bcomspace(n, locs) | |
offset = ctrl_offset(ctrl_locs, ctrl_configs) | |
subspace_mul!(st, comspace, U, subspace, offset) | |
return st | |
end | |
function broutine!(st::AbstractVector{T}, ::Val{:X}, locs::Tuple{Int}) where T | |
step_1 = 1 << (first(locs) - 1) | |
step_2 = 1 << first(locs) | |
@inbounds if step_1 == 1 | |
for j in 0:step_2:size(st, 1)-step_1 | |
tmp = st[j + 1] | |
st[j + 1] = st[j + 1 + step_1] | |
st[j + 1 + step_1] = tmp | |
end | |
elseif step_1 == 2 | |
for j in 0:step_2:size(st, 1)-step_1 | |
Base.Cartesian.@nexprs 2 i->begin | |
tmp = st[j + i] | |
st[j + i] = st[j + i + step_1] | |
st[j + i + step_1] = tmp | |
end | |
end | |
elseif step_1 == 4 | |
for j in 0:step_2:size(st, 1)-step_1 | |
Base.Cartesian.@nexprs 4 i->begin | |
tmp = st[j + i] | |
st[j + i] = st[j + i + step_1] | |
st[j + i + step_1] = tmp | |
end | |
end | |
elseif step_1 == 8 | |
for j in 0:step_2:size(st, 1)-step_1 | |
Base.Cartesian.@nexprs 8 i->begin | |
tmp = st[j + i] | |
st[j + i] = st[j + i + step_1] | |
st[j + i + step_1] = tmp | |
end | |
end | |
else | |
for j in 0:step_2:size(st, 1)-step_1 | |
for i in j:8:j+step_1-1 | |
Base.Cartesian.@nexprs 8 k->begin | |
tmp = st[i+k] | |
st[i+k] = st[i + step_1 + k] | |
st[i + step_1 + k] = tmp | |
end | |
end | |
end | |
end | |
return st | |
end | |
function broutine!(st::AbstractVector{T}, ::Val{:Rz}, locs::Tuple{Int}, theta::Real) where T | |
U11 = exp(-im * theta/2) | |
U22 = exp(im * theta/2) | |
step_1 = 1 << (first(locs) - 1) | |
step_2 = 1 << first(locs) | |
@inbounds if step_1 == 1 | |
for j in 0:step_2:size(st, 1)-step_1 | |
ST1 = U11 * st[j + 1] | |
ST2 = U22 * st[j + 1 + step_1] | |
st[j + 1] = ST1 | |
st[j + 1 + step_1] = ST2 | |
end | |
elseif step_1 == 2 | |
for j in 0:step_2:size(st, 1)-step_1 | |
Base.Cartesian.@nexprs 2 i->begin | |
ST1 = U11 * st[j + i] | |
ST2 = U22 * st[j + i + step_1] | |
st[j + i] = ST1 | |
st[j + i + step_1] = ST2 | |
end | |
end | |
elseif step_1 == 4 | |
for j in 0:step_2:size(st, 1)-step_1 | |
Base.Cartesian.@nexprs 4 i->begin | |
ST1 = U11 * st[j + i] | |
ST2 = U22 * st[j + i + step_1] | |
st[j + i] = ST1 | |
st[j + i + step_1] = ST2 | |
end | |
end | |
elseif step_1 == 8 | |
for j in 0:step_2:size(st, 1)-step_1 | |
Base.Cartesian.@nexprs 8 i->begin | |
ST1 = U11 * st[j + i] | |
ST2 = U22 * st[j + i + step_1] | |
st[j + i] = ST1 | |
st[j + i + step_1] = ST2 | |
end | |
end | |
else | |
for j in 0:step_2:size(st, 1)-step_1 | |
for i in j:8:j+step_1-1 | |
Base.Cartesian.@nexprs 8 k->begin | |
ST1 = U11 * st[i + k] | |
ST2 = U22 * st[i + step_1 + k] | |
st[i + k] = ST1 | |
st[i + step_1 + k] = ST2 | |
end | |
end | |
end | |
end | |
return st | |
end | |
using CUDA | |
function broutine!(st::CuVector{T}, U::AbstractMatrix, locs::Tuple{Int}) where T | |
U11 = U[1, 1]; U12 = U[1, 2]; | |
U21 = U[2, 1]; U22 = U[2, 2]; | |
step_1 = 1 << (first(locs) - 1) | |
step_2 = 1 << first(locs) | |
function kernel(st) | |
idx = (blockIdx().x - 1) * blockDim().x + threadIdx().x | |
j = step_2 * idx - step_2 | |
for i in j+1:j+step_1 | |
ST1 = U11 * st[i] + U12 * st[i + step_1] | |
ST2 = U21 * st[i] + U22 * st[i + step_1] | |
st[i] = ST1 | |
st[i + step_1] = ST2 | |
end | |
return | |
end | |
N = length(0:step_2:size(st, 1)-step_1) | |
nblocks = ceil(Int, N/256) | |
@cuda threads=256 blocks=nblocks kernel(st) | |
return st | |
end | |
function broutine!(st::CuVector{T}, U::AbstractMatrix, locs::Tuple{Int}, ctrl_locs::NTuple{M, Int}, ctrl_configs::NTuple{M, Int}) where {T, M} | |
step_1 = 1 << (first(locs) - 1) | |
step_2 = 1 << first(locs) | |
ctrl_mask = bmask(ctrl_locs) | |
flag_mask = reduce(+, 1 << (ctrl_locs[i] - 1) for i in 1:length(ctrl_locs) if ctrl_configs[i] == 1) | |
U11 = U[1, 1]; U12 = U[1, 2]; | |
U21 = U[2, 1]; U22 = U[2, 2]; | |
function kernel(st) | |
idx = (blockIdx().x - 1) * blockDim().x + threadIdx().x | |
j = step_2 * idx - step_2 | |
for i in j:j+step_1-1 | |
if ismatch(i, ctrl_mask, flag_mask) | |
ST1 = U11 * st[i+1] + U12 * st[i + step_1 + 1] | |
ST2 = U21 * st[i+1] + U22 * st[i + step_1 + 1] | |
st[i + 1] = ST1 | |
st[i + step_1 + 1] = ST2 | |
end | |
end | |
return | |
end | |
N = length(0:step_2:size(st, 1)-step_1) | |
nblocks = ceil(Int, N/256) | |
@cuda threads=256 blocks=nblocks kernel(st) | |
return st | |
end | |
using SymEngine | |
@vars α θ | |
st = Basic[1, α, 0, 0] | |
broutine!(st, [exp(-im * θ) 0; 0 exp(im * θ)], (1, )) | |
using BenchmarkTools | |
data = Dict( | |
"X" => [], | |
"T" => [], | |
"H" => [], | |
"CNOT" => [], | |
) | |
for n in 4:25 | |
st = rand(ComplexF64, 1<<n) | |
t = @benchmark broutine!(r, $([0 1;1 0]), (3, )) setup=(r=copy($st)) | |
push!(data["X"], minimum(t).time) | |
end | |
for n in 4:25 | |
st = rand(ComplexF64, 1<<n) | |
t = @benchmark broutine!(r, $([1 0;0 exp(im * π / 4)]), (3, )) setup=(r=copy($st)) | |
push!(data["T"], minimum(t).time) | |
end | |
for n in 4:25 | |
st = rand(ComplexF64, 1<<n) | |
t = @benchmark broutine!(r, $([1/sqrt(2) 1/sqrt(2); 1/sqrt(2) -1/sqrt(2)]), (3, )) setup=(r=copy($st)) | |
push!(data["H"], minimum(t).time) | |
end | |
for n in 4:25 | |
st = rand(ComplexF64, 1<<n) | |
t = @benchmark broutine!(r, $([0 1;1 0]), (2, ), (3, ), (1, )) setup=(r=copy($st)) | |
push!(data["CNOT"], minimum(t).time) | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment