Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Code to optimize multi-allele tumor heterogeneity scores (MATH). Specifically, we modify an underlying VAF distribution by swapping points while maintaining a constant MAD and median, yielding a constant MATH.
# Code to optimize multi-allele tumor heterogeneity scores (MATH).
# Specifically, we modify an underlying VAF distribution by swapping points
# while maintaining a constant MAD and median, yielding a constant MATH.
using Distributions
using Random
using Statistics
using Gadfly
using Base64
import Cairo, Fontconfig
function make_png(plt, width, height)
io = IOBuffer()
b64io = Base64EncodePipe(io)
draw(SVG(b64io, width, height), plt)
close(b64io)
return "<img src='data:image/svg+xml;base64,$(String(take!(io)))'>"
end
function plot_histogram(vals, xlabel; width=600px, height=400px)
@assert !any(isnothing.(vals))
plt = plot(
x = vec(vals),
Geom.histogram(bincount=30),
Guide.xlabel(xlabel),
Guide.ylabel("Count"),
Coord.cartesian(xmin=0, xmax=1.1),
)
return make_png(plt, width, height)
end
function calc_mad(A)
med = median(A)
mad = median(abs.(A .- med))
return mad
end
function sample_vafs(N, T, weights, phi)
@assert size(weights) == size(phi)
K = length(weights)
cats = rand(Categorical(weights), N)
V = [rand(Binomial(T, phi[cats[idx]])) for idx in 1:N]
vafs = V ./ T
return vafs
end
function exchange_points(A, dist; iters=1000)
A = copy(A)
N = length(A)
if N % 2 == 0
A = A[2:end]
N -= 1
end
med = median(A)
mad = calc_mad(A)
for iter in 1:iters
idx = rand(1:N)
V = A[idx]
dev = abs(med - V)
if V < med && dev < mad
L = med - mad
U = med
elseif V < med && dev > mad
L = 0
U = med - mad
elseif V > med && dev < mad
L = med
U = med + mad
elseif V > med && dev > mad
L = med + mad
U = 1
else
continue
end
A[idx] = rand(dist) * (U - L) + L
end
return A
end
function test_beta()
K = 10
N = 10000
X = [rand(Beta(0.5*K, 0.5*K), N)]
K *= 8.2
push!(X, [
rand(Beta(0.25*K, 0.75*K), floor(Int, 0.45*N));
rand(Beta(0.50*K, 0.50*K), N);
rand(Beta(0.75*K, 0.25*K), floor(Int, 0.45*N));
])
push!(X, exchange_points(X[1], iters=10N))
open("out.html", "w") do F
for xvals in X
println((calc_mad(xvals), median(xvals), mean(xvals)))
write(F, plot_histogram(xvals, "VAFs"))
write(F, "<br>")
end
end
end
function test_binom()
N = 10000
T = 100
X = []
push!(X, sample_vafs(
N,
0.15*T,
[1.],
[0.2],
))
push!(X, sample_vafs(
N,
2.75*T,
[0.5, 0.5],
[0.3, 0.59],
))
push!(X, sample_vafs(
N,
2.75*T,
[0.28, 0.44, 0.28],
[0.67, 0.40, 0.25],
))
for dist in (Uniform(0,1), Beta(0.5,5), Beta(5,5))
push!(X, exchange_points(
X[3],
dist,
iters=10N),
)
end
push!(X, sample_vafs(
N,
2.75*T,
[0.35, 0.65],
[0.65, 0.45],
))
return X
end
function main()
threshold = 1e-5
iters = 10000
done = 0
dists = Array{Any}(undef,iters)
deltas = fill(Inf, iters)
# Samples with replacement. We would prefer without replacement, but this
# would require using StatsBase.
seeds = rand(1:2^32, iters)
lck = ReentrantLock()
Threads.@threads for iter in 1:iters
seed = seeds[iter]
Random.seed!(seed)
dists[iter] = test_binom()
scores = [calc_mad(xvals)/median(xvals) for xvals in dists[iter]]
# Exclude the two-subclone case that isn't supposed to have a similar MATH
# to other cases.
target_scores = scores[1:(end-1)]
deltas[iter] = maximum(target_scores) - minimum(target_scores)
lock(lck) do
done += 1
status = (done, iter, iters, seed, deltas[iter], minimum(deltas))
println(status)
end
end
best = argmin(deltas)
html = ""
for xvals in dists[best]
html *= "<p>$((calc_mad(xvals), median(xvals), mean(xvals), calc_mad(xvals)/median(xvals)))</p>"
html *= plot_histogram(xvals, "Variant allele frequency (VAF)")
html *= "<br>"
end
open("/scratch/q/qmorris/jawinter/tmp/out.html", "w") do F
write(F, html)
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment