This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def EulerianCycle(strings, format=True): | |
##Ignore this formatting block, it's only for a desired input | |
if format: | |
graph = [i.split(' -> ') for i in strings] | |
graph = dict(graph) | |
for (key, val) in graph.items(): | |
val = val.split(',') | |
graph[key] = val | |
copy_graph = copy.deepcopy(dict(graph)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
lst = ['TCGGGGGTTTTT', 'CCGGTGACTTAC', 'ACGGGGATTTTC', 'TTGGGGACTTTT', 'AAGGGGACTTCC', 'TTGGGGACTTCC', 'TCGGGGATTCAT', 'TCGGGGATTCCT', 'TAGGGGAACTAC', 'TCGGGTATAACC'] | |
formatted = [np.array([i for i in x]) for x in lst] | |
formatted = np.array([i for i in formatted]).transpose() | |
counts = Count(formatted, laplace=False) | |
print(counts.transpose()) | |
entropy = H_e(counts) | |
# print(entropy) | |
print(sum(entropy)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def Count(motif_matrix, laplace=True): | |
motif_matrix = list([i for i in motif_matrix]) | |
N = len(motif_matrix[0]) * 2 | |
count_matrix = [[] for _ in range(len(motif_matrix))] | |
if laplace == True: | |
for idx, column in enumerate(motif_matrix): | |
count_matrix[idx] = np.asarray([np.asarray((list(column).count(i)+1)/N) for i in ['A', 'C', 'G', 'T']]) | |
elif laplace == False: | |
N = len(motif_matrix[0]) | |
for idx, column in enumerate(motif_matrix): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def Reconstruction(final, k, d): | |
pref = ''.join([i[0] for i in final[:-1]]) + final[-1][:int(len(final[0])/2)] | |
suff = ''.join([i[int(len(final[0])/2)] for i in final[:-1]]) + final[-1][int(len(final[0])/2):] | |
return pref[:k+d] + suff | |
txt = _read('dataset_204_16 (2).txt') | |
params = txt[0].split(' ') | |
k = int(params[0]) | |
d = int(params[1]) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def DeBruijnPair(k, pairs): | |
prefs = [str(i[0][:-1]) + str(i[1][:-1]) for i in pairs] | |
suffs = [str(i[0][1:]) + str(i[1][1:]) for i in pairs] | |
pairs = [[prefs[i], suffs[i]] for i in range(len(prefs))] | |
pairdebruijn = {key[0]: [] for key in pairs} | |
for pair in pairs: | |
pairdebruijn[pair[0]].append(pair[1]) | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
txt = _read('dataset_203_6.txt') | |
a = EulerianPath(txt) | |
print('->'.join(a)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
0 -> 1,2422 | |
1 -> 114,3,4,46 | |
10 -> 54,63,705,9 | |
100 -> 101,2785 | |
1000 -> 1002,1110 | |
1001 -> 1000 | |
1002 -> 1247,632 | |
1003 -> 573 | |
1004 -> 1005 | |
1005 -> 1003 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def EulerianPath(strings, format=True): | |
#Similar formatting for turning txt file to DeBruijn graph in python dict form | |
if format: | |
graph = [i.split(' -> ') for i in strings] | |
graph = dict(graph) | |
for (key, val) in graph.items(): | |
val = val.split(',') | |
graph[key] = val | |
copy_graph = copy.deepcopy(dict(graph)) | |
else: |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def _read(filename): | |
with open(filename, 'r') as sqinput: | |
sqinput = sqinput.read().splitlines() | |
return sqinput | |
txt = _read('dataset_203_99 (1).txt') | |
final = EulerianCycle(txt) | |
print('->'.join(final)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
0 -> 1,19,731 | |
1 -> 290,3,4 | |
10 -> 100,5 | |
100 -> 102,164,269 | |
1000 -> 1002 | |
1001 -> 1185,148 | |
1002 -> 1001 | |
1003 -> 1005 | |
1004 -> 1935,90 | |
1005 -> 1004,2166 |
NewerOlder