Skip to content

Instantly share code, notes, and snippets.

@tsvibt
Created February 24, 2025 23:04
Show Gist options
  • Save tsvibt/89cec1b4fd7d54be04ba34a0059bcc4c to your computer and use it in GitHub Desktop.
Save tsvibt/89cec1b4fd7d54be04ba34a0059bcc4c to your computer and use it in GitHub Desktop.
import numpy as np
import itertools
from functools import partial
import statistics
chromosome_lengths = {1: 247249719, 2: 242951149, 3: 199501827, 4: 191273063, 5: 180857866, 6: 170899992, 7: 158821424, 8: 146274826, 9: 140273252, 10: 135374737, 11: 134452384, 12: 132349534, 13: 114142980, 14: 106368585, 15: 100338915, 16: 88827254, 17: 78774742, 18: 76117153, 19: 63811651, 20: 62435964, 21: 46944323, 22: 49691432, 'Y': 57772954, 'X': 154913754}
chromosome_lengths_mushed = {index:length for index,length in chromosome_lengths.items() if isinstance(index, int)}
# this doesn't exactly make sense, but doing it exactly right is fiddly and doesn't make much numerical difference so whatever.
chromosome_lengths_mushed[23] = (chromosome_lengths['X'] + chromosome_lengths['Y'])/2
haploid_total_length = sum(chromosome_lengths_mushed.values())
diploid_total_length = 2*haploid_total_length
diploid_chromosome_fractions = {index: length/diploid_total_length for index, length in chromosome_lengths_mushed.items()}
class Chromosome():
def __init__(self, segment_count, total_variance):
self.segment_count = segment_count
self.total_variance = total_variance
self.segment_variance = self.total_variance / self.segment_count
self.segment_SD = np.sqrt(self.segment_variance)
self.segments = np.random.normal(loc=0, scale=self.segment_SD, size=self.segment_count)
self._cache_partial_sums = np.concatenate(([0], np.cumsum(self.segments)))
self.partial_sum = (self._cache_partial_sums, self._cache_partial_sums[self.segment_count] - self._cache_partial_sums)
self.sum = self.partial_sum[0][self.segment_count]
class DiploidChromosome():
def __init__(self, segment_count=100, total_variance = 1):
self.chromosomes = [Chromosome(segment_count, total_variance) for _ in [0,1]]
# to get a recombination score with lead_chr first, then cross at i, you add together the initial segment of
# lead_chr[:i], .partial_sum[0], plus the "complement of initial segment [:i]", stored in partial_sum[1]
self.recombinations = np.array([sum(self.chromosomes[chr].partial_sum[i] for i, chr in enumerate([lead_chr, 1-lead_chr]))
for lead_chr in [0,1]])
self.max_recombination = np.max(self.recombinations)
self.max_chromosome = max(chr.sum for chr in self.chromosomes)
self.recombination_variance = np.var(self.recombinations)
self.mean = np.mean([chr.sum for chr in self.chromosomes])
def sample_recombination(self):
return self.recombinations[tuple(np.random.randint(0, dim) for dim in self.recombinations.shape)]
class DiploidGenome():
def __init__(self, segment_count=100):
self.chromosome_pairs = {index: DiploidChromosome(segment_count, fraction)
for index, fraction in diploid_chromosome_fractions.items()}
self.haploid_variance = sum(pair.recombination_variance for pair in self.chromosome_pairs.values())
self.haploid_mean = sum(pair.mean for pair in self.chromosome_pairs.values())
def sample_haploid(self):
return sum(diploid_chromosome.sample_recombination() for diploid_chromosome in self.chromosome_pairs.values())
def gaussian_pdf(sigma, mu, x): return statistics.NormalDist(sigma=sigma, mu=mu).pdf(x)
class Couple():
def __init__(self, segment_count=100, simulated=False):
self.parents = [DiploidGenome(segment_count=segment_count) for _ in range(2)]
self.embryo_variance = sum(parent.haploid_variance for parent in self.parents)
self.embryo_SD = np.sqrt(self.embryo_variance)
self.embryo_mean = sum(parent.haploid_mean for parent in self.parents)
self.embryo_gaussian = partial(gaussian_pdf, self.embryo_SD, self.embryo_mean)
def sample_embryo(self):
return sum(parent.sample_haploid() for parent in self.parents)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment