Skip to content

Instantly share code, notes, and snippets.

@arose13
Created January 19, 2016 17:43
Show Gist options
  • Save arose13/dffd98d06dd2c2193b43 to your computer and use it in GitHub Desktop.
Save arose13/dffd98d06dd2c2193b43 to your computer and use it in GitHub Desktop.
Genotype Combinations
import pandas as pd
from collections import Counter
from itertools import combinations_with_replacement
population = [
'pp',
'pp',
'qq',
'pq',
'pq',
'qp',
'pp'
]
print('Population: {}'.format(population))
alleles = 'pq'
print('Possible Alleles {}'.format(alleles))
counts = Counter(population)
print(counts)
print('Possible Allele Combinations are..')
observed_table = pd.DataFrame()
for genotype in combinations_with_replacement(alleles, 2):
genotype_string = genotype[0] + genotype[1]
if genotype[0] == genotype[1]:
# Homo therefore order doesn't matter
homo_count = counts[genotype_string]
print('{} {}'.format(genotype_string, homo_count))
observed_table.set_value(0, genotype_string, homo_count)
# df.set_value() is ~100x faster than other individual setting methods
#observed_table[genotype_string] = homo_count
else:
# Hetero therefore order does matter
hetero_count = counts[genotype_string]
hetero_count += counts[genotype_string[::-1]] # I'm using extended slice syntax to reverse the string [::-1]
print('{} {}'.format(genotype_string, hetero_count))
observed_table.set_value(0, genotype_string, hetero_count)
#observed_table[genotype_string] = hetero_count
print(observed_table)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment