Skip to content

Instantly share code, notes, and snippets.

View devpatelio's full-sized avatar
🏟️
!99S

Dev Patel devpatelio

🏟️
!99S
View GitHub Profile
def EulerianCycle(strings, format=True):
##Ignore this formatting block, it's only for a desired input
if format:
graph = [i.split(' -> ') for i in strings]
graph = dict(graph)
for (key, val) in graph.items():
val = val.split(',')
graph[key] = val
copy_graph = copy.deepcopy(dict(graph))
lst = ['TCGGGGGTTTTT', 'CCGGTGACTTAC', 'ACGGGGATTTTC', 'TTGGGGACTTTT', 'AAGGGGACTTCC', 'TTGGGGACTTCC', 'TCGGGGATTCAT', 'TCGGGGATTCCT', 'TAGGGGAACTAC', 'TCGGGTATAACC']
formatted = [np.array([i for i in x]) for x in lst]
formatted = np.array([i for i in formatted]).transpose()
counts = Count(formatted, laplace=False)
print(counts.transpose())
entropy = H_e(counts)
# print(entropy)
print(sum(entropy))
def Count(motif_matrix, laplace=True):
motif_matrix = list([i for i in motif_matrix])
N = len(motif_matrix[0]) * 2
count_matrix = [[] for _ in range(len(motif_matrix))]
if laplace == True:
for idx, column in enumerate(motif_matrix):
count_matrix[idx] = np.asarray([np.asarray((list(column).count(i)+1)/N) for i in ['A', 'C', 'G', 'T']])
elif laplace == False:
N = len(motif_matrix[0])
for idx, column in enumerate(motif_matrix):
def Reconstruction(final, k, d):
pref = ''.join([i[0] for i in final[:-1]]) + final[-1][:int(len(final[0])/2)]
suff = ''.join([i[int(len(final[0])/2)] for i in final[:-1]]) + final[-1][int(len(final[0])/2):]
return pref[:k+d] + suff
txt = _read('dataset_204_16 (2).txt')
params = txt[0].split(' ')
k = int(params[0])
d = int(params[1])
def DeBruijnPair(k, pairs):
prefs = [str(i[0][:-1]) + str(i[1][:-1]) for i in pairs]
suffs = [str(i[0][1:]) + str(i[1][1:]) for i in pairs]
pairs = [[prefs[i], suffs[i]] for i in range(len(prefs))]
pairdebruijn = {key[0]: [] for key in pairs}
for pair in pairs:
pairdebruijn[pair[0]].append(pair[1])
txt = _read('dataset_203_6.txt')
a = EulerianPath(txt)
print('->'.join(a))
0 -> 1,2422
1 -> 114,3,4,46
10 -> 54,63,705,9
100 -> 101,2785
1000 -> 1002,1110
1001 -> 1000
1002 -> 1247,632
1003 -> 573
1004 -> 1005
1005 -> 1003
def EulerianPath(strings, format=True):
#Similar formatting for turning txt file to DeBruijn graph in python dict form
if format:
graph = [i.split(' -> ') for i in strings]
graph = dict(graph)
for (key, val) in graph.items():
val = val.split(',')
graph[key] = val
copy_graph = copy.deepcopy(dict(graph))
else:
def _read(filename):
with open(filename, 'r') as sqinput:
sqinput = sqinput.read().splitlines()
return sqinput
txt = _read('dataset_203_99 (1).txt')
final = EulerianCycle(txt)
print('->'.join(final))
0 -> 1,19,731
1 -> 290,3,4
10 -> 100,5
100 -> 102,164,269
1000 -> 1002
1001 -> 1185,148
1002 -> 1001
1003 -> 1005
1004 -> 1935,90
1005 -> 1004,2166