Last active
August 29, 2015 14:01
-
-
Save saulshanabrook/94fc5328853b86562f63 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from collections import namedtuple | |
Parents = namedtuple('Parents', ['mother', 'father']) | |
from itertools import zip_longest | |
from collections import Counter, abc | |
class FrozenCounter(Counter): | |
''' | |
from http://stackoverflow.com/q/10045562/907060 | |
''' | |
def __hash__(self): | |
"Implements hash(self) -> int" | |
if not hasattr(self, '_hash'): | |
self._hash = hash(frozenset(self.items())) | |
return self._hash | |
def chunks(l, n): | |
""" | |
from http://stackoverflow.com/a/312464/907060 | |
Yield successive n-sized chunks from l. | |
""" | |
return zip(*[iter(l)] * n) | |
class Gene(abc.Hashable, abc.Set): | |
def __init__(self, alleles): | |
self.alleles = tuple(sorted(alleles)) | |
self._verify() | |
def _verify(self): | |
assert len(self.alleles) == 2 | |
assert self.alleles[0].lower() == self.alleles[1].lower() | |
def __contains__(self, allele): | |
return allele in self.alleles | |
def __iter__(self): | |
return iter(self.alleles) | |
def __len__(self): | |
return 2 | |
def __hash__(self): | |
return hash(self.alleles) | |
def __repr__(self): | |
return 'Gene(\'' + ''.join(self.alleles) + '\')' | |
def __str__(self): | |
return ''.join(self.alleles) | |
@property | |
def dominant(self): | |
return self.alleles[0].isupper() | |
@property | |
def recessive(self): | |
return not self.dominant | |
assert repr(Gene('aA') == "Gene('Aa')") | |
assert repr(Gene('rR') == "Gene('rR')") | |
assert Gene('rr').recessive | |
assert Gene('Rr').dominant | |
assert Gene('RR').dominant | |
from itertools import product, chain | |
class Genotype(abc.Hashable, abc.Set): | |
def __init__(self, alleles): | |
self._traits = tuple(map(Gene, chunks(alleles, 2))) | |
def __contains__(self, gene): | |
return gene in self._traits | |
def __iter__(self): | |
return iter(self._traits) | |
def __len__(self): | |
return len(self._traits) | |
def __hash__(self): | |
return hash(self._traits) | |
def __repr__(self): | |
return 'Genotype(\'' + ''.join(map(str, self._traits)) + '\')' | |
def __str__(self): | |
return ''.join(map(str, self._traits)) | |
@classmethod | |
def from_nested(cls, nested_genes): | |
return cls(chain(*nested_genes)) | |
assert repr(Genotype('AabB') == "Genotype('AaBb')") | |
assert repr(Gene('rR')) == repr(Gene('Rr')) | |
assert Genotype.from_nested([['A', 'a'], ['B', 'b']]) == Genotype('AaBb') | |
from functools import partial | |
def all_genotype_parents_from_structure(structure): | |
''' | |
Returns all the possible parents given a phenotypicaly structure | |
''' | |
# ['Aa', 'Bb', ...] | |
all_alleles = [([trait[0][0].upper(), trait[0][0].lower()]) for trait in structure] | |
# [['Aa', 'aa', ...], ['Bb', ...]] | |
product_twice = lambda l: tuple(product(l, repeat=2)) | |
all_possible_gene_pairs = map(product_twice, all_alleles) | |
# [[['A', 'A'], ['B', 'b']], [['A', 'A'], ['B', 'b']], ...] | |
all_parents = product(*all_possible_gene_pairs) | |
# [Genotype('AABb'), Genotype('AABb'), ...] | |
all_parents = list(map(Genotype.from_nested, all_parents)) | |
# [[Genotype('AABb'), Genotype('AABb')], [Genotype('AABb'), Genotype('AABB')], ...] | |
parent_combinations = product(all_parents, all_parents) | |
return set(map(FrozenCounter, parent_combinations)) | |
assert FrozenCounter([Genotype('RRSS'), Genotype('RRSS',)]) in all_genotype_parents_from_structure((('red', 'yellow'), ('smooth', 'r'))) | |
assert all_genotype_parents_from_structure((('red', 'yellow'), ),) == {FrozenCounter([Genotype('RR'), Genotype('RR',)]), | |
FrozenCounter([Genotype('RR'), Genotype('Rr',)]), | |
FrozenCounter([Genotype('RR'), Genotype('rr',)]), | |
FrozenCounter([Genotype('rr'), Genotype('rr',)]), | |
FrozenCounter([Genotype('Rr'), Genotype('Rr',)]), | |
FrozenCounter([Genotype('Rr'), Genotype('rr',)])} | |
def phenotype_from_genotype(genotype, structure): | |
return tuple(expression[0] if gene.dominant else expression[1] for gene, expression in zip(genotype, structure)) | |
assert phenotype_from_genotype(Genotype('bbll'), (('black', 'white'), ('long', 'short'))) == ('white', 'short') | |
assert phenotype_from_genotype(Genotype('bb'), (('black', 'white'),)) == ('white', ) | |
assert phenotype_from_genotype(Genotype('Bb'), (('black', 'white'),)) == ('black', ) | |
def normalize_values(dictionary): | |
sum_values = sum(dictionary.values()) | |
return dictionary.__class__({key: value / sum_values for key, value in dictionary.items()}) | |
assert normalize_values({'_': 1.0})['_'] == 1.0 | |
assert normalize_values({'_': 1.0, 'h': 1.0})['_'] == 0.5 | |
product_from_iter = lambda l: product(*l) | |
def offspring_genotypes(parent_genotypes): | |
# [(Gene('Aa'), Gene('Aa')), (Gene('Bb'), Gene('Bb'))] | |
gathered_genes = zip(*parent_genotypes) | |
# [[('A', 'A'), ('A', 'a'), ('a', 'A'), ('a', 'a')], [('B', 'B'), ('B', 'b'), ('b', 'B'), ('b', 'b')]] | |
gene_combinations = map(product_from_iter, gathered_genes) | |
# [[('A', 'A'), ('B', 'B')], [('A', 'A'), ('B', 'b')], ...] | |
offspring = product_from_iter(gene_combinations) | |
# [Genotype('AABB'), Genotype('AABb'), ...] | |
offspring = map(Genotype.from_nested, offspring) | |
return Counter(offspring) | |
assert offspring_genotypes((Genotype('Aa'), Genotype('aa'))) == Counter({Genotype('Aa'): 2, Genotype('aa'): 2}) | |
assert offspring_genotypes((Genotype('aa'), Genotype('aa'))) == Counter({Genotype('aa'): 4}) | |
assert offspring_genotypes((Genotype('Aa'), Genotype('Aa'))) == Counter({Genotype('Aa'): 2, Genotype('aa'): 1, Genotype('AA'): 1}) | |
assert offspring_genotypes((Genotype('AaBb'), Genotype('AaBb'))) == Counter({Genotype('AaBb'): 4, | |
Genotype('Aabb'): 2, | |
Genotype('AaBB'): 2, | |
Genotype('aaBb'): 2, | |
Genotype('AABb'): 2, | |
Genotype('aabb'): 1, | |
Genotype('AABB'): 1, | |
Genotype('aaBB'): 1, | |
Genotype('AAbb'): 1}) | |
def pprint_parents_genotypes(parents_genotypes): | |
return ' (x) '.join(map(str, parents_genotypes)) | |
assert pprint_parents_genotypes(Parents(Genotype('AA'), Genotype(('aa')))) == 'AA (x) aa' | |
def round_values(dictionary, round_p): | |
return dictionary.__class__({key: round(value, round_p) for key, value in dictionary.items()}) | |
assert(round_values({'_': 1.9898}, 1) == {'_': 2.0}) | |
from itertools import repeat | |
from collections import Counter | |
def parent_genotypes(parents_phenotypes, offspring_phenotypes, structure): | |
''' | |
Returns the probabiblity of different parent genotypes, based on the phenotypes of the known children | |
and parents. | |
''' | |
parents_genotypes_probability = Counter() | |
# lets try a brute force technique. get a combination of every possible parents genotypes and | |
# try them on the parents and offspring phenotypes to see if they match. | |
for trial_parents_genotypes in all_genotype_parents_from_structure(structure): | |
# first make sure that the parents work | |
trial_parents_phenotypes = map(phenotype_from_genotype, trial_parents_genotypes.elements(), repeat(structure)) | |
if Counter(trial_parents_phenotypes) == Counter(parents_phenotypes): | |
# then we want to see if the real offspring are a subset of the children of those parents | |
trial_offspring_genotypes = offspring_genotypes(trial_parents_genotypes.elements()) | |
trial_offspring_phenotypes = Counter(map(phenotype_from_genotype, trial_offspring_genotypes.elements(), repeat(structure))) | |
if set(offspring_phenotypes).issubset(set(trial_offspring_phenotypes)): | |
# then we know that those parents could actually be the right ones, however they might not be, | |
# so we also need to compute, how probable it is. | |
# one way to look at it is, if the `trial_parents` had `len(offspring_phenotypes)` kids, what percentage | |
# of the time would the phenotypes of their kids be `offspring_phenotypes`? | |
# that gives us the probability of getting the actual litter we have with these parents. | |
# then we can compare that probability to the other options, and see which one is more likely. | |
chance_that_offspring_came_from_trial_parents = 1 | |
trial_offspring_phenotypes_percentiles = normalize_values(trial_offspring_phenotypes) | |
for offspring_phenotype in offspring_phenotypes: | |
# calculate the chance of getting each child, and since we are talking about succesion we multiply them | |
chance_that_offspring_came_from_trial_parents *= trial_offspring_phenotypes_percentiles[offspring_phenotype] | |
parents_genotypes_probability[pprint_parents_genotypes(trial_parents_genotypes.elements())] = chance_that_offspring_came_from_trial_parents | |
return normalize_values(parents_genotypes_probability) | |
assert parent_genotypes(Parents(('black',), ('black',)), (('black',),), (('orange', 'black'),),) == {'oo (x) oo': 1.0} | |
assert round_values(parent_genotypes( | |
parents_phenotypes=( | |
('orange',), | |
('orange',) | |
), | |
offspring_phenotypes=list(Counter({ | |
('orange',): 100, | |
}).elements()), | |
structure=( | |
('orange', 'black'), | |
), | |
), 1) == {'Oo (x) Oo': 0.0, 'OO (x) OO': 0.5, 'OO (x) Oo': 0.5} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment