Created
July 27, 2016 16:37
-
-
Save flashton2003/637b5a78fba3fadd0de0fa51a2d9759f to your computer and use it in GitHub Desktop.
convert 2d matrix to flat three column
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
### this script takes in a distance matrix produced by https://github.com/tseemann/nullarbor/blob/master/bin/afa-pairwise.pl which is a 2d matrix | |
# a b c | |
# a 0 1 2 | |
# b 1 0 1 | |
# c 2 1 0 | |
# and prints out the half matrix in three column format, with no self-self comparisons | |
# a b 1 | |
# a c 2 | |
# b c 1 | |
## personally, i find this format a little easier to work with in iterating through all the pairs etc. | |
def read_in_matrix(infile): | |
strains = [] | |
diff_matrix = {} | |
with open(infile) as fi: | |
lines = fi.readlines() | |
# the first row is the header | |
header = lines.pop(0) | |
strains = header.strip().split('\t') | |
## the first item in the header is 'ID', get rid of this. | |
strains = strains[1:] | |
for line in lines: | |
split_line = line.split('\t') | |
## first element in the split line is the strain id | |
strain1 = split_line.pop(0) | |
diff_matrix[strain1] = {} | |
for i, strain2 in enumerate(strains): | |
if strain2 in diff_matrix and strain1 not in diff_matrix[strain2]: | |
diff_matrix[strain1][strain2] = split_line[i] | |
return diff_matrix | |
def print_matrix(diff_matrix): | |
for strain1 in diff_matrix: | |
for strain2 in diff_matrix[strain1]: | |
if strain1 != strain2: | |
print '\t'.join([strain1, strain2, diff_matrix[strain1][strain2]]) | |
def main(): | |
## output of afa-pairwise.pl | |
infile = 'data/2016.07.27/st4.dist' | |
diff_matrix = read_in_matrix(infile) | |
print_matrix(diff_matrix) | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment