Skip to content

Instantly share code, notes, and snippets.

View Roger-luo's full-sized avatar
🍭
casting spells

Xiu-zhe (Roger) Luo Roger-luo

🍭
casting spells
View GitHub Profile
@Roger-luo
Roger-luo / gist:f2bfe56d882c06e9905fad8e4e1cf826
Created February 18, 2020 20:40
performance regression of Tracker in Zygote: a MPS case
using TNFilters
using Flux
using Zygote
using BenchmarkTools
using TNFilters: bmm, bmm!, batched_tr
using Flux: params
using Zygote: AContext, Context, _pullback, cache, accum_param
# I have not found why yet, the manual generated result roughly gives the same performance as Tracker (Tracker is about 35μs)
@Roger-luo
Roger-luo / preallocation.jl
Created February 19, 2020 01:10
Cassette slow down the program?
using LinearAlgebra
using LinearAlgebra: promote_op, matprod
# multiply 2x2 matrices
function matmul2x2(tA, tB, A::AbstractArray{T, 3}, B::AbstractArray{S, 3}) where {T,S}
matmul2x2!(similar(B, promote_op(matprod, T, S), 2, 2, size(A, 3)), tA, tB, A, B)
end
function matmul2x2!(C::AbstractArray{T1, 3}, tA, tB, A::AbstractArray{T2, 3}, B::AbstractArray{T3, 3}) where {T1,T2,T3}
if !(size(A) == size(B) == size(C) == (2,2, size(A, 3)))
@Roger-luo
Roger-luo / patch.jl
Created March 2, 2020 18:07
AutoPreallocation patch
using Zygote: @adjoint, _pullback, Context, cache
using AutoPreallocation
using Cassette
export expect, ∇expect, exact_expect, ∇exact_expect
using AutoPreallocation: RecordingCtx, ReplayCtx
# https://github.com/oxinabox/AutoPreallocation.jl/pull/9
@inline Cassette.overdub(ctx::RecordingCtx, ::typeof(Base.haskey), collection, key) = haskey(collection, key)
@inline Cassette.overdub(ctx::ReplayCtx, ::typeof(Base.haskey), collection, key) = haskey(collection, key)
using Flux
struct Linear{T, WT <: AbstractMatrix{T}, BT <: AbstractVector{T}}
W::WT
b::BT
end
Flux.functor(m::Linear) = (m.W, m.b), (W, b) -> Linear(W, b)
m = Linear(rand(2, 2), rand(2))
using CUDA
using ExponentialUtilities
using LinearAlgebra
using BenchmarkTools
using ExponentialUtilities: getV, getH, get_cache, _exp!
using LinearAlgebra: BlasReal, BlasComplex
using SparseArrays
using CUDA: CUBLAS
CUDA.allowscalar(false)
@Roger-luo
Roger-luo / hoist.jl
Created August 16, 2019 20:39
Alloc.jl in Cassette
module HoistMem
export hoist_alloc, Buffer
using Cassette, LinearAlgebra
using Cassette: @context, overdub
@context BuffCtx
mutable struct Buffer
using Dates
function count_hours(m)
days = filter(Date(2020, m, 14): Day(1):Date(2020, m+1, 13)) do day
!(dayname(day) in ["Saturday", "Sunday"])
end
return length(days) * 8
end
count_hours(7) * 30
using YaoCompiler
using YaoCompiler.Intrinsics
qasm"""OPENQASM 2.0;
include "qelib1.inc";
gate post q {x q;}
"""
@device function circuit()
@Roger-luo
Roger-luo / tiny_yao.jl
Created December 20, 2020 07:05
Implement your own (full amplitude) top performance quantum circuit emulator in ONE day!
macro _threads(ex)
return quote
if (Threads.nthreads() > 1) && (length(st) > 4096)
$(Expr(:macrocall, Expr(:(.), :Threads, QuoteNode(Symbol("@threads"))), __source__, ex))
else
$ex
end
end |> esc
end
using Random
using Yao
using Plots
using LinearAlgebra
using QuantumInformation
function run_circuit!(r::AbstractRegister, θs::Matrix, p::Real=0.0)
n = nqubits(r)
for j in 1:size(θs, 2) # each layer
for i in 1:size(θs, 1)÷2 # each wire