saulshanabrook/helpers.py

## helpers.py
from collections import namedtuple

Parents = namedtuple('Parents', ['mother', 'father'])

from itertools import zip_longest

from collections import Counter, abc

class FrozenCounter(Counter):
    '''
    from http://stackoverflow.com/q/10045562/907060
    '''
    def __hash__(self):
        "Implements hash(self) -> int"
        if not hasattr(self, '_hash'):
            self._hash = hash(frozenset(self.items()))
        return self._hash

def chunks(l, n):
    """
    from http://stackoverflow.com/a/312464/907060
    Yield successive n-sized chunks from l.
    """
    return zip(*[iter(l)] * n)


class Gene(abc.Hashable, abc.Set):
    def __init__(self, alleles):
        self.alleles = tuple(sorted(alleles))
        self._verify()

    def _verify(self):
        assert len(self.alleles) == 2
        assert self.alleles[0].lower() == self.alleles[1].lower()

    def __contains__(self, allele):
        return allele in self.alleles

    def __iter__(self):
        return iter(self.alleles)

    def __len__(self):
        return 2

    def __hash__(self):
        return hash(self.alleles)

    def __repr__(self):
        return 'Gene(\'' + ''.join(self.alleles) + '\')'

    def __str__(self):
        return ''.join(self.alleles)

    @property
    def dominant(self):
        return self.alleles[0].isupper()

    @property
    def recessive(self):
        return not self.dominant

assert repr(Gene('aA') == "Gene('Aa')")
assert repr(Gene('rR') == "Gene('rR')")

assert Gene('rr').recessive
assert Gene('Rr').dominant
assert Gene('RR').dominant

from itertools import product, chain


class Genotype(abc.Hashable, abc.Set):
    def __init__(self, alleles):
        self._traits = tuple(map(Gene, chunks(alleles, 2)))

    def __contains__(self, gene):
        return gene in self._traits

    def __iter__(self):
        return iter(self._traits)

    def __len__(self):
        return len(self._traits)

    def __hash__(self):
        return hash(self._traits)

    def __repr__(self):
        return 'Genotype(\'' + ''.join(map(str, self._traits)) + '\')'

    def __str__(self):
        return ''.join(map(str, self._traits))

    @classmethod
    def from_nested(cls, nested_genes):
        return cls(chain(*nested_genes))

assert repr(Genotype('AabB') == "Genotype('AaBb')")
assert repr(Gene('rR')) == repr(Gene('Rr'))
assert Genotype.from_nested([['A', 'a'], ['B', 'b']]) == Genotype('AaBb')

from functools import partial

def all_genotype_parents_from_structure(structure):
    '''
    Returns all the possible parents given a phenotypicaly structure
    '''

    # ['Aa', 'Bb', ...]
    all_alleles = [([trait[0][0].upper(), trait[0][0].lower()]) for trait in structure]

    # [['Aa', 'aa', ...], ['Bb', ...]]
    product_twice = lambda l: tuple(product(l, repeat=2))
    all_possible_gene_pairs = map(product_twice, all_alleles)

    # [[['A', 'A'], ['B', 'b']], [['A', 'A'], ['B', 'b']], ...]
    all_parents = product(*all_possible_gene_pairs)

    # [Genotype('AABb'), Genotype('AABb'), ...]
    all_parents = list(map(Genotype.from_nested, all_parents))

    # [[Genotype('AABb'), Genotype('AABb')], [Genotype('AABb'), Genotype('AABB')], ...]
    parent_combinations = product(all_parents, all_parents)

    return set(map(FrozenCounter, parent_combinations))


assert FrozenCounter([Genotype('RRSS'), Genotype('RRSS',)]) in all_genotype_parents_from_structure((('red', 'yellow'), ('smooth', 'r')))
assert all_genotype_parents_from_structure((('red', 'yellow'), ),) == {FrozenCounter([Genotype('RR'), Genotype('RR',)]),
                                                                       FrozenCounter([Genotype('RR'), Genotype('Rr',)]),
                                                                       FrozenCounter([Genotype('RR'), Genotype('rr',)]),
                                                                       FrozenCounter([Genotype('rr'), Genotype('rr',)]),
                                                                       FrozenCounter([Genotype('Rr'), Genotype('Rr',)]),
                                                                       FrozenCounter([Genotype('Rr'), Genotype('rr',)])}


def phenotype_from_genotype(genotype, structure):
    return tuple(expression[0] if gene.dominant else expression[1] for gene, expression in zip(genotype, structure))

assert phenotype_from_genotype(Genotype('bbll'), (('black', 'white'), ('long', 'short'))) == ('white', 'short')
assert phenotype_from_genotype(Genotype('bb'), (('black', 'white'),)) == ('white', )
assert phenotype_from_genotype(Genotype('Bb'), (('black', 'white'),)) == ('black', )


def normalize_values(dictionary):
    sum_values = sum(dictionary.values())
    return dictionary.__class__({key: value / sum_values for key, value in dictionary.items()})

assert normalize_values({'_': 1.0})['_'] == 1.0
assert normalize_values({'_': 1.0, 'h': 1.0})['_'] == 0.5

product_from_iter = lambda l: product(*l)

def offspring_genotypes(parent_genotypes):
    # [(Gene('Aa'), Gene('Aa')), (Gene('Bb'), Gene('Bb'))]
    gathered_genes = zip(*parent_genotypes)

    # [[('A', 'A'), ('A', 'a'), ('a', 'A'), ('a', 'a')], [('B', 'B'), ('B', 'b'), ('b', 'B'), ('b', 'b')]]
    gene_combinations = map(product_from_iter, gathered_genes)

    # [[('A', 'A'), ('B', 'B')], [('A', 'A'), ('B', 'b')], ...]
    offspring = product_from_iter(gene_combinations)

    # [Genotype('AABB'), Genotype('AABb'), ...]
    offspring = map(Genotype.from_nested, offspring)

    return Counter(offspring)

assert offspring_genotypes((Genotype('Aa'), Genotype('aa'))) == Counter({Genotype('Aa'): 2, Genotype('aa'): 2})
assert offspring_genotypes((Genotype('aa'), Genotype('aa'))) == Counter({Genotype('aa'): 4})
assert offspring_genotypes((Genotype('Aa'), Genotype('Aa'))) == Counter({Genotype('Aa'): 2, Genotype('aa'): 1, Genotype('AA'): 1})

assert offspring_genotypes((Genotype('AaBb'), Genotype('AaBb'))) == Counter({Genotype('AaBb'): 4,
                                                         Genotype('Aabb'): 2,
                                                         Genotype('AaBB'): 2,
                                                         Genotype('aaBb'): 2,
                                                         Genotype('AABb'): 2,
                                                         Genotype('aabb'): 1,
                                                         Genotype('AABB'): 1,
                                                         Genotype('aaBB'): 1,
                                                         Genotype('AAbb'): 1})


def pprint_parents_genotypes(parents_genotypes):
    return ' (x) '.join(map(str, parents_genotypes))

assert pprint_parents_genotypes(Parents(Genotype('AA'), Genotype(('aa')))) == 'AA (x) aa'


def round_values(dictionary, round_p):
    return dictionary.__class__({key: round(value, round_p) for key, value in dictionary.items()})

assert(round_values({'_': 1.9898}, 1) == {'_': 2.0})


from itertools import repeat
from collections import Counter

def parent_genotypes(parents_phenotypes, offspring_phenotypes, structure):
    '''
    Returns the probabiblity of different parent genotypes, based on the phenotypes of the known children
    and parents.
    '''

    parents_genotypes_probability = Counter()

    # lets try a brute force technique. get a combination of every possible parents genotypes and
    # try them on the parents and offspring phenotypes to see if they match.
    for trial_parents_genotypes in all_genotype_parents_from_structure(structure):
        # first make sure that the parents work
        trial_parents_phenotypes = map(phenotype_from_genotype, trial_parents_genotypes.elements(), repeat(structure))
        if Counter(trial_parents_phenotypes) == Counter(parents_phenotypes):
            # then we want to see if the real offspring are a subset of the children of those parents
            trial_offspring_genotypes = offspring_genotypes(trial_parents_genotypes.elements())
            trial_offspring_phenotypes = Counter(map(phenotype_from_genotype, trial_offspring_genotypes.elements(), repeat(structure)))
            if set(offspring_phenotypes).issubset(set(trial_offspring_phenotypes)):
                # then we know that those parents could actually be the right ones, however they might not be,
                # so we also need to compute, how probable it is.

                # one way to look at it is, if the `trial_parents` had `len(offspring_phenotypes)` kids, what percentage
                # of the time would the phenotypes of their kids be `offspring_phenotypes`?

                # that gives us the probability of getting the actual litter we have with these parents.

                # then we can compare that probability to the other options, and see which one is more likely.
                chance_that_offspring_came_from_trial_parents = 1

                trial_offspring_phenotypes_percentiles = normalize_values(trial_offspring_phenotypes)
                for offspring_phenotype in offspring_phenotypes:
                    # calculate the chance of getting each child, and since we are talking about succesion we multiply them
                    chance_that_offspring_came_from_trial_parents *= trial_offspring_phenotypes_percentiles[offspring_phenotype]

                parents_genotypes_probability[pprint_parents_genotypes(trial_parents_genotypes.elements())] = chance_that_offspring_came_from_trial_parents

    return normalize_values(parents_genotypes_probability)

assert parent_genotypes(Parents(('black',), ('black',)), (('black',),), (('orange', 'black'),),) == {'oo (x) oo': 1.0}
assert round_values(parent_genotypes(
    parents_phenotypes=(
        ('orange',),
        ('orange',)
    ),
    offspring_phenotypes=list(Counter({
        ('orange',): 100,
    }).elements()),
    structure=(
        ('orange', 'black'),
    ),
), 1) == {'Oo (x) Oo': 0.0, 'OO (x) OO': 0.5, 'OO (x) Oo': 0.5}

## punnet.ipynb

      
Display the source blob

    
Display the rendered blob

    
    Raw
  

              punnet.ipynb
            
          
        Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
	from collections import namedtuple

	Parents = namedtuple('Parents', ['mother', 'father'])

	from itertools import zip_longest

	from collections import Counter, abc

	class FrozenCounter(Counter):
	'''
	from http://stackoverflow.com/q/10045562/907060
	'''
	def __hash__(self):
	"Implements hash(self) -> int"
	if not hasattr(self, '_hash'):
	self._hash = hash(frozenset(self.items()))
	return self._hash

	def chunks(l, n):
	"""
	from http://stackoverflow.com/a/312464/907060
	Yield successive n-sized chunks from l.
	"""
	return zip([iter(l)] n)


	class Gene(abc.Hashable, abc.Set):
	def __init__(self, alleles):
	self.alleles = tuple(sorted(alleles))
	self._verify()

	def _verify(self):
	assert len(self.alleles) == 2
	assert self.alleles[0].lower() == self.alleles[1].lower()

	def __contains__(self, allele):
	return allele in self.alleles

	def __iter__(self):
	return iter(self.alleles)

	def __len__(self):
	return 2

	def __hash__(self):
	return hash(self.alleles)

	def __repr__(self):
	return 'Gene(\'' + ''.join(self.alleles) + '\')'

	def __str__(self):
	return ''.join(self.alleles)

	@property
	def dominant(self):
	return self.alleles[0].isupper()

	@property
	def recessive(self):
	return not self.dominant

	assert repr(Gene('aA') == "Gene('Aa')")
	assert repr(Gene('rR') == "Gene('rR')")

	assert Gene('rr').recessive
	assert Gene('Rr').dominant
	assert Gene('RR').dominant

	from itertools import product, chain


	class Genotype(abc.Hashable, abc.Set):
	def __init__(self, alleles):
	self._traits = tuple(map(Gene, chunks(alleles, 2)))

	def __contains__(self, gene):
	return gene in self._traits

	def __iter__(self):
	return iter(self._traits)

	def __len__(self):
	return len(self._traits)

	def __hash__(self):
	return hash(self._traits)

	def __repr__(self):
	return 'Genotype(\'' + ''.join(map(str, self._traits)) + '\')'

	def __str__(self):
	return ''.join(map(str, self._traits))

	@classmethod
	def from_nested(cls, nested_genes):
	return cls(chain(*nested_genes))

	assert repr(Genotype('AabB') == "Genotype('AaBb')")
	assert repr(Gene('rR')) == repr(Gene('Rr'))
	assert Genotype.from_nested([['A', 'a'], ['B', 'b']]) == Genotype('AaBb')

	from functools import partial

	def all_genotype_parents_from_structure(structure):
	'''
	Returns all the possible parents given a phenotypicaly structure
	'''

	# ['Aa', 'Bb', ...]
	all_alleles = [([trait[0][0].upper(), trait[0][0].lower()]) for trait in structure]

	# [['Aa', 'aa', ...], ['Bb', ...]]
	product_twice = lambda l: tuple(product(l, repeat=2))
	all_possible_gene_pairs = map(product_twice, all_alleles)

	# [[['A', 'A'], ['B', 'b']], [['A', 'A'], ['B', 'b']], ...]
	all_parents = product(*all_possible_gene_pairs)

	# [Genotype('AABb'), Genotype('AABb'), ...]
	all_parents = list(map(Genotype.from_nested, all_parents))

	# [[Genotype('AABb'), Genotype('AABb')], [Genotype('AABb'), Genotype('AABB')], ...]
	parent_combinations = product(all_parents, all_parents)

	return set(map(FrozenCounter, parent_combinations))


	assert FrozenCounter([Genotype('RRSS'), Genotype('RRSS',)]) in all_genotype_parents_from_structure((('red', 'yellow'), ('smooth', 'r')))
	assert all_genotype_parents_from_structure((('red', 'yellow'), ),) == {FrozenCounter([Genotype('RR'), Genotype('RR',)]),
	FrozenCounter([Genotype('RR'), Genotype('Rr',)]),
	FrozenCounter([Genotype('RR'), Genotype('rr',)]),
	FrozenCounter([Genotype('rr'), Genotype('rr',)]),
	FrozenCounter([Genotype('Rr'), Genotype('Rr',)]),
	FrozenCounter([Genotype('Rr'), Genotype('rr',)])}


	def phenotype_from_genotype(genotype, structure):
	return tuple(expression[0] if gene.dominant else expression[1] for gene, expression in zip(genotype, structure))

	assert phenotype_from_genotype(Genotype('bbll'), (('black', 'white'), ('long', 'short'))) == ('white', 'short')
	assert phenotype_from_genotype(Genotype('bb'), (('black', 'white'),)) == ('white', )
	assert phenotype_from_genotype(Genotype('Bb'), (('black', 'white'),)) == ('black', )


	def normalize_values(dictionary):
	sum_values = sum(dictionary.values())
	return dictionary.__class__({key: value / sum_values for key, value in dictionary.items()})

	assert normalize_values({'_': 1.0})['_'] == 1.0
	assert normalize_values({'_': 1.0, 'h': 1.0})['_'] == 0.5

	product_from_iter = lambda l: product(*l)

	def offspring_genotypes(parent_genotypes):
	# [(Gene('Aa'), Gene('Aa')), (Gene('Bb'), Gene('Bb'))]
	gathered_genes = zip(*parent_genotypes)

	# [[('A', 'A'), ('A', 'a'), ('a', 'A'), ('a', 'a')], [('B', 'B'), ('B', 'b'), ('b', 'B'), ('b', 'b')]]
	gene_combinations = map(product_from_iter, gathered_genes)

	# [[('A', 'A'), ('B', 'B')], [('A', 'A'), ('B', 'b')], ...]
	offspring = product_from_iter(gene_combinations)

	# [Genotype('AABB'), Genotype('AABb'), ...]
	offspring = map(Genotype.from_nested, offspring)

	return Counter(offspring)

	assert offspring_genotypes((Genotype('Aa'), Genotype('aa'))) == Counter({Genotype('Aa'): 2, Genotype('aa'): 2})
	assert offspring_genotypes((Genotype('aa'), Genotype('aa'))) == Counter({Genotype('aa'): 4})
	assert offspring_genotypes((Genotype('Aa'), Genotype('Aa'))) == Counter({Genotype('Aa'): 2, Genotype('aa'): 1, Genotype('AA'): 1})

	assert offspring_genotypes((Genotype('AaBb'), Genotype('AaBb'))) == Counter({Genotype('AaBb'): 4,
	Genotype('Aabb'): 2,
	Genotype('AaBB'): 2,
	Genotype('aaBb'): 2,
	Genotype('AABb'): 2,
	Genotype('aabb'): 1,
	Genotype('AABB'): 1,
	Genotype('aaBB'): 1,
	Genotype('AAbb'): 1})



	def pprint_parents_genotypes(parents_genotypes):
	return ' (x) '.join(map(str, parents_genotypes))

	assert pprint_parents_genotypes(Parents(Genotype('AA'), Genotype(('aa')))) == 'AA (x) aa'



	def round_values(dictionary, round_p):
	return dictionary.__class__({key: round(value, round_p) for key, value in dictionary.items()})

	assert(round_values({'_': 1.9898}, 1) == {'_': 2.0})


	from itertools import repeat
	from collections import Counter

	def parent_genotypes(parents_phenotypes, offspring_phenotypes, structure):
	'''
	Returns the probabiblity of different parent genotypes, based on the phenotypes of the known children
	and parents.
	'''

	parents_genotypes_probability = Counter()

	# lets try a brute force technique. get a combination of every possible parents genotypes and
	# try them on the parents and offspring phenotypes to see if they match.
	for trial_parents_genotypes in all_genotype_parents_from_structure(structure):
	# first make sure that the parents work
	trial_parents_phenotypes = map(phenotype_from_genotype, trial_parents_genotypes.elements(), repeat(structure))
	if Counter(trial_parents_phenotypes) == Counter(parents_phenotypes):
	# then we want to see if the real offspring are a subset of the children of those parents
	trial_offspring_genotypes = offspring_genotypes(trial_parents_genotypes.elements())
	trial_offspring_phenotypes = Counter(map(phenotype_from_genotype, trial_offspring_genotypes.elements(), repeat(structure)))
	if set(offspring_phenotypes).issubset(set(trial_offspring_phenotypes)):
	# then we know that those parents could actually be the right ones, however they might not be,
	# so we also need to compute, how probable it is.

	# one way to look at it is, if the `trial_parents` had `len(offspring_phenotypes)` kids, what percentage
	# of the time would the phenotypes of their kids be `offspring_phenotypes`?

	# that gives us the probability of getting the actual litter we have with these parents.

	# then we can compare that probability to the other options, and see which one is more likely.
	chance_that_offspring_came_from_trial_parents = 1

	trial_offspring_phenotypes_percentiles = normalize_values(trial_offspring_phenotypes)
	for offspring_phenotype in offspring_phenotypes:
	# calculate the chance of getting each child, and since we are talking about succesion we multiply them
	chance_that_offspring_came_from_trial_parents *= trial_offspring_phenotypes_percentiles[offspring_phenotype]

	parents_genotypes_probability[pprint_parents_genotypes(trial_parents_genotypes.elements())] = chance_that_offspring_came_from_trial_parents

	return normalize_values(parents_genotypes_probability)

	assert parent_genotypes(Parents(('black',), ('black',)), (('black',),), (('orange', 'black'),),) == {'oo (x) oo': 1.0}
	assert round_values(parent_genotypes(
	parents_phenotypes=(
	('orange',),
	('orange',)
	),
	offspring_phenotypes=list(Counter({
	('orange',): 100,
	}).elements()),
	structure=(
	('orange', 'black'),
	),
	), 1) == {'Oo (x) Oo': 0.0, 'OO (x) OO': 0.5, 'OO (x) Oo': 0.5}