Created
October 11, 2016 22:16
-
-
Save ryantuck/f2d716bbac24a1956be78ef03d75413d to your computer and use it in GitHub Desktop.
messing around with generating strings given frequencies of substrings in python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import random | |
# define letters | |
letters = ['A', 'C', 'T', 'G'] | |
# get a sorted list of your triplets and frequencies | |
# (assuming you have a way of getting these already) | |
# (these are for illustration) | |
trips = ['AAG', 'CCC', 'TTT'] | |
freqs = [0.25, 0.70, 0.05] | |
def pick(): | |
# generate a random number between 0 and 1 | |
x = random.uniform(0,1) | |
# create a running total as you add all frequencies | |
# if the random number falls into a given frequency bucket, | |
# return the triplet you want | |
running_sum = 0 | |
for trip, freq in zip(trips, freqs): | |
running_sum += freq | |
if x <= running_sum: | |
return trip | |
def assemble_string(): | |
# assemble a string by picking | |
# 100 triplets given their frequencies | |
my_string = '' | |
for i in range(100): | |
my_string += pick() | |
return my_string | |
def frequency_breakdown(input_string): | |
# get frequencies of all letters in a string | |
frequencies = {letter: 0 for letter in letters} | |
for letter in input_string: | |
frequencies[letter] += 1 | |
return frequencies | |
def iterate(num_trials): | |
total_frequencies = {letter: 0 for letter in letters} | |
for i in range(num_trials): | |
# generate string and get frequency breakdown | |
s = assemble_string() | |
f = frequency_breakdown(s) | |
# add trials results to overall results | |
for k in total_frequencies.keys(): | |
total_frequencies[k] += f[k] | |
return total_frequencies | |
def convert_to_percentages(frequencies): | |
total = sum(frequencies.values()) | |
pct_freqs = {k: float(v)/total for k,v in frequencies.iteritems()} | |
return pct_freqs | |
# actually do the experiment | |
if __name__ == '__main__': | |
results = iterate(10000) | |
print convert_to_percentages(results) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment