|
|
|
import numpy as np |
|
import itertools |
|
from functools import partial |
|
import statistics |
|
|
|
chromosome_lengths = {1: 247249719, 2: 242951149, 3: 199501827, 4: 191273063, 5: 180857866, 6: 170899992, 7: 158821424, 8: 146274826, 9: 140273252, 10: 135374737, 11: 134452384, 12: 132349534, 13: 114142980, 14: 106368585, 15: 100338915, 16: 88827254, 17: 78774742, 18: 76117153, 19: 63811651, 20: 62435964, 21: 46944323, 22: 49691432, 'Y': 57772954, 'X': 154913754} |
|
|
|
chromosome_lengths_mushed = {index:length for index,length in chromosome_lengths.items() if isinstance(index, int)} |
|
# this doesn't exactly make sense, but doing it exactly right is fiddly and doesn't make much numerical difference so whatever. |
|
chromosome_lengths_mushed[23] = (chromosome_lengths['X'] + chromosome_lengths['Y'])/2 |
|
haploid_total_length = sum(chromosome_lengths_mushed.values()) |
|
diploid_total_length = 2*haploid_total_length |
|
diploid_chromosome_fractions = {index: length/diploid_total_length for index, length in chromosome_lengths_mushed.items()} |
|
|
|
class Chromosome(): |
|
def __init__(self, segment_count, total_variance): |
|
self.segment_count = segment_count |
|
self.total_variance = total_variance |
|
self.segment_variance = self.total_variance / self.segment_count |
|
self.segment_SD = np.sqrt(self.segment_variance) |
|
self.segments = np.random.normal(loc=0, scale=self.segment_SD, size=self.segment_count) |
|
self._cache_partial_sums = np.concatenate(([0], np.cumsum(self.segments))) |
|
self.partial_sum = (self._cache_partial_sums, self._cache_partial_sums[self.segment_count] - self._cache_partial_sums) |
|
self.sum = self.partial_sum[0][self.segment_count] |
|
|
|
class DiploidChromosome(): |
|
def __init__(self, segment_count=100, total_variance = 1): |
|
|
|
self.chromosomes = [Chromosome(segment_count, total_variance) for _ in [0,1]] |
|
# to get a recombination score with lead_chr first, then cross at i, you add together the initial segment of |
|
# lead_chr[:i], .partial_sum[0], plus the "complement of initial segment [:i]", stored in partial_sum[1] |
|
self.recombinations = np.array([sum(self.chromosomes[chr].partial_sum[i] for i, chr in enumerate([lead_chr, 1-lead_chr])) |
|
for lead_chr in [0,1]]) |
|
self.max_recombination = np.max(self.recombinations) |
|
self.max_chromosome = max(chr.sum for chr in self.chromosomes) |
|
self.recombination_variance = np.var(self.recombinations) |
|
self.mean = np.mean([chr.sum for chr in self.chromosomes]) |
|
|
|
def sample_recombination(self): |
|
return self.recombinations[tuple(np.random.randint(0, dim) for dim in self.recombinations.shape)] |
|
|
|
class DiploidGenome(): |
|
def __init__(self, segment_count=100): |
|
self.chromosome_pairs = {index: DiploidChromosome(segment_count, fraction) |
|
for index, fraction in diploid_chromosome_fractions.items()} |
|
self.haploid_variance = sum(pair.recombination_variance for pair in self.chromosome_pairs.values()) |
|
self.haploid_mean = sum(pair.mean for pair in self.chromosome_pairs.values()) |
|
|
|
def sample_haploid(self): |
|
return sum(diploid_chromosome.sample_recombination() for diploid_chromosome in self.chromosome_pairs.values()) |
|
|
|
def gaussian_pdf(sigma, mu, x): return statistics.NormalDist(sigma=sigma, mu=mu).pdf(x) |
|
|
|
|
|
class Couple(): |
|
def __init__(self, segment_count=100, simulated=False): |
|
self.parents = [DiploidGenome(segment_count=segment_count) for _ in range(2)] |
|
self.embryo_variance = sum(parent.haploid_variance for parent in self.parents) |
|
self.embryo_SD = np.sqrt(self.embryo_variance) |
|
self.embryo_mean = sum(parent.haploid_mean for parent in self.parents) |
|
self.embryo_gaussian = partial(gaussian_pdf, self.embryo_SD, self.embryo_mean) |
|
|
|
def sample_embryo(self): |
|
return sum(parent.sample_haploid() for parent in self.parents) |
|
|