{{ message }}

Instantly share code, notes, and snippets.

# jwintersinger/optim_math.jl

Last active May 28, 2021
Code to optimize multi-allele tumor heterogeneity scores (MATH). Specifically, we modify an underlying VAF distribution by swapping points while maintaining a constant MAD and median, yielding a constant MATH.
 # Code to optimize multi-allele tumor heterogeneity scores (MATH). # Specifically, we modify an underlying VAF distribution by swapping points # while maintaining a constant MAD and median, yielding a constant MATH. using Distributions using Random using Statistics using Gadfly using Base64 import Cairo, Fontconfig function make_png(plt, width, height) io = IOBuffer() b64io = Base64EncodePipe(io) draw(SVG(b64io, width, height), plt) close(b64io) return "" end function plot_histogram(vals, xlabel; width=600px, height=400px) @assert !any(isnothing.(vals)) plt = plot( x = vec(vals), Geom.histogram(bincount=30), Guide.xlabel(xlabel), Guide.ylabel("Count"), Coord.cartesian(xmin=0, xmax=1.1), ) return make_png(plt, width, height) end function calc_mad(A) med = median(A) mad = median(abs.(A .- med)) return mad end function sample_vafs(N, T, weights, phi) @assert size(weights) == size(phi) K = length(weights) cats = rand(Categorical(weights), N) V = [rand(Binomial(T, phi[cats[idx]])) for idx in 1:N] vafs = V ./ T return vafs end function exchange_points(A, dist; iters=1000) A = copy(A) N = length(A) if N % 2 == 0 A = A[2:end] N -= 1 end med = median(A) mad = calc_mad(A) for iter in 1:iters idx = rand(1:N) V = A[idx] dev = abs(med - V) if V < med && dev < mad L = med - mad U = med elseif V < med && dev > mad L = 0 U = med - mad elseif V > med && dev < mad L = med U = med + mad elseif V > med && dev > mad L = med + mad U = 1 else continue end A[idx] = rand(dist) * (U - L) + L end return A end function test_beta() K = 10 N = 10000 X = [rand(Beta(0.5*K, 0.5*K), N)] K *= 8.2 push!(X, [ rand(Beta(0.25*K, 0.75*K), floor(Int, 0.45*N)); rand(Beta(0.50*K, 0.50*K), N); rand(Beta(0.75*K, 0.25*K), floor(Int, 0.45*N)); ]) push!(X, exchange_points(X, iters=10N)) open("out.html", "w") do F for xvals in X println((calc_mad(xvals), median(xvals), mean(xvals))) write(F, plot_histogram(xvals, "VAFs")) write(F, "
") end end end function test_binom() N = 10000 T = 100 X = [] push!(X, sample_vafs( N, 0.15*T, [1.], [0.2], )) push!(X, sample_vafs( N, 2.75*T, [0.5, 0.5], [0.3, 0.59], )) push!(X, sample_vafs( N, 2.75*T, [0.28, 0.44, 0.28], [0.67, 0.40, 0.25], )) for dist in (Uniform(0,1), Beta(0.5,5), Beta(5,5)) push!(X, exchange_points( X, dist, iters=10N), ) end push!(X, sample_vafs( N, 2.75*T, [0.35, 0.65], [0.65, 0.45], )) return X end function main() threshold = 1e-5 iters = 10000 done = 0 dists = Array{Any}(undef,iters) deltas = fill(Inf, iters) # Samples with replacement. We would prefer without replacement, but this # would require using StatsBase. seeds = rand(1:2^32, iters) lck = ReentrantLock() Threads.@threads for iter in 1:iters seed = seeds[iter] Random.seed!(seed) dists[iter] = test_binom() scores = [calc_mad(xvals)/median(xvals) for xvals in dists[iter]] # Exclude the two-subclone case that isn't supposed to have a similar MATH # to other cases. target_scores = scores[1:(end-1)] deltas[iter] = maximum(target_scores) - minimum(target_scores) lock(lck) do done += 1 status = (done, iter, iters, seed, deltas[iter], minimum(deltas)) println(status) end end best = argmin(deltas) html = "" for xvals in dists[best] html *= "