Skip to content

Instantly share code, notes, and snippets.

@acomjean
Last active November 16, 2015 02:52
Show Gist options
  • Save acomjean/a402abda36e1c6d3e4e8 to your computer and use it in GitHub Desktop.
Save acomjean/a402abda36e1c6d3e4e8 to your computer and use it in GitHub Desktop.
import random;
#Author: Michael Thomas/ Aram Comjean
#Date: Nov/ 2015
#Hungtington's Disease CAG count
# generates random sequence with at least the number of "cag" repeats specified
def generate_random_seq (number_cag_repeats):
# the length of the total string will be length_k + number_cag_repeats * 3
length_k = 10000
# the cag repeats can't start before "off_ends" or within "off_ends" of the end
# eg if off_ends = 100 the cag repeat will show up between 100 and 900
off_ends = 200
# get random distance from start for cag repeats. Make sure in-frame
min_cag_start = off_ends
max_cag_end = length_k - off_ends - (3 * number_cag_repeats)
distance_from_start = random.randint(min_cag_start, max_cag_end)
distance_from_start = distance_from_start - distance_from_start%3
# woh
# https://answers.yahoo.com/question/index;_ylt=A0LEViwjg0ZWI0gAtJUnnIlQ;_ylu=X3oDMTBydWNmY2MwBGNvbG8DYmYxBHBvcwM0BHZ0aWQDBHNlYwNzcg--?qid=20091007014822AAfsdzT&p=create%20string%20from%20repeat%20sequence%20python
cag_repeat_string = ('CAG' * number_cag_repeats)
#create sequence
cag_sequence = ''
for x in range(length_k):
# build string
cag_sequence += random.choice('ACTG');
#string insert the cag repeats
#http://stackoverflow.com/questions/4022827/how-to-insert-some-string-in-the-given-string-at-given-index-in-python
cag_sequence = cag_sequence[:distance_from_start] + cag_repeat_string + cag_sequence[distance_from_start:]
return cag_sequence
#get random sequence with CAGs here
rand_seq = generate_random_seq(49)
#list compreension to split seq into codons
codons = [rand_seq[i:i+3] for i in range(0,len(rand_seq),3)]
#initialize count/pos
count = 0
pos = 0
#loop to check for CAGs and count
#if they are concurrant
max_in_row = 0;
max_pos = 0;
for i in codons:
#print pos ,' - ', codons[pos]
if codons[pos] == 'CAG':
if (count == 0):
current_repeat_location = pos
count = count + 1
if (count>max_in_row):
max_in_row = count
max_pos = current_repeat_location
else:
count =0;
pos = pos + 1
print "# of 'CAG' found in a row: ",max_in_row
print "# of 'CAG' reparts start at codon: ",max_pos
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment