Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Code to optimize multi-allele tumor heterogeneity scores (MATH). Specifically, we modify an underlying VAF distribution by swapping points while maintaining a constant MAD and median, yielding a constant MATH.
using Distributions
using Random
using Statistics
using Gadfly
using Base64
import Cairo, Fontconfig
function make_png(plt, width, height)
io = IOBuffer()
b64io = Base64EncodePipe(io)
draw(SVG(b64io, width, height), plt)
close(b64io)
return "<img src='data:image/svg+xml;base64,$(String(take!(io)))'>"
end
function plot_histogram(vals, xlabel; width=600px, height=400px)
@assert !any(isnothing.(vals))
plt = plot(
x = vec(vals),
Geom.histogram(bincount=30),
Guide.xlabel(xlabel),
Guide.ylabel("Count"),
Coord.cartesian(xmin=0, xmax=1.1),
)
return make_png(plt, width, height)
end
function calc_mad(A)
med = median(A)
mad = median(abs.(A .- med))
return mad
end
function sample_vafs(N, T, weights, phi)
@assert size(weights) == size(phi)
K = length(weights)
cats = rand(Categorical(weights), N)
V = [rand(Binomial(T, phi[cats[idx]])) for idx in 1:N]
vafs = V ./ T
return vafs
end
function exchange_points(A, dist; iters=1000)
A = copy(A)
N = length(A)
if N % 2 == 0
A = A[2:end]
N -= 1
end
med = median(A)
mad = calc_mad(A)
for iter in 1:iters
idx = rand(1:N)
V = A[idx]
dev = abs(med - V)
if V < med && dev < mad
L = med - mad
U = med
elseif V < med && dev > mad
L = 0
U = med - mad
elseif V > med && dev < mad
L = med
U = med + mad
elseif V > med && dev > mad
L = med + mad
U = 1
else
continue
end
A[idx] = rand(dist) * (U - L) + L
end
return A
end
function test_beta()
K = 10
N = 10000
X = [rand(Beta(0.5*K, 0.5*K), N)]
K *= 8.2
push!(X, [
rand(Beta(0.25*K, 0.75*K), floor(Int, 0.45*N));
rand(Beta(0.50*K, 0.50*K), N);
rand(Beta(0.75*K, 0.25*K), floor(Int, 0.45*N));
])
push!(X, exchange_points(X[1], iters=10N))
open("out.html", "w") do F
for xvals in X
println((calc_mad(xvals), median(xvals), mean(xvals)))
write(F, plot_histogram(xvals, "VAFs"))
write(F, "<br>")
end
end
end
function test_binom()
N = 10000
T = 100
X = []
push!(X, sample_vafs(
N,
0.15*T,
[1.],
[0.2],
))
push!(X, sample_vafs(
N,
2.75*T,
[0.5, 0.5],
[0.3, 0.59],
))
push!(X, sample_vafs(
N,
2.75*T,
[0.28, 0.44, 0.28],
[0.67, 0.40, 0.25],
))
for dist in (Uniform(0,1), Beta(5,0.5))
push!(X, exchange_points(
X[3],
dist,
iters=10N),
)
end
push!(X, sample_vafs(
N,
2.75*T,
[0.35, 0.65],
[0.65, 0.45],
))
open("out.html", "w") do F
for xvals in X
write(F, "<p>$((calc_mad(xvals), median(xvals), mean(xvals), calc_mad(xvals)/median(xvals)))</p>")
write(F, plot_histogram(xvals, "Variant allele frequency (VAF)"))
write(F, "<br>")
end
end
return [calc_mad(xvals)/median(xvals) for xvals in X]
end
function main()
threshold = 1e-5
iters = 1000
min_delta = Inf
for iter in 1:iters
seed = rand(1:2^32)
Random.seed!(seed)
scores = test_binom()[1:(end-1)]
delta_max = maximum(scores) - minimum(scores)
if delta_max < min_delta
min_delta = delta_max
end
println((iter, seed, delta_max, min_delta))
if delta_max < threshold
break
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment