Skip to content

Instantly share code, notes, and snippets.

@flashton2003
Created July 27, 2016 16:37
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save flashton2003/637b5a78fba3fadd0de0fa51a2d9759f to your computer and use it in GitHub Desktop.
Save flashton2003/637b5a78fba3fadd0de0fa51a2d9759f to your computer and use it in GitHub Desktop.
convert 2d matrix to flat three column
### this script takes in a distance matrix produced by https://github.com/tseemann/nullarbor/blob/master/bin/afa-pairwise.pl which is a 2d matrix
# a b c
# a 0 1 2
# b 1 0 1
# c 2 1 0
# and prints out the half matrix in three column format, with no self-self comparisons
# a b 1
# a c 2
# b c 1
## personally, i find this format a little easier to work with in iterating through all the pairs etc.
def read_in_matrix(infile):
strains = []
diff_matrix = {}
with open(infile) as fi:
lines = fi.readlines()
# the first row is the header
header = lines.pop(0)
strains = header.strip().split('\t')
## the first item in the header is 'ID', get rid of this.
strains = strains[1:]
for line in lines:
split_line = line.split('\t')
## first element in the split line is the strain id
strain1 = split_line.pop(0)
diff_matrix[strain1] = {}
for i, strain2 in enumerate(strains):
if strain2 in diff_matrix and strain1 not in diff_matrix[strain2]:
diff_matrix[strain1][strain2] = split_line[i]
return diff_matrix
def print_matrix(diff_matrix):
for strain1 in diff_matrix:
for strain2 in diff_matrix[strain1]:
if strain1 != strain2:
print '\t'.join([strain1, strain2, diff_matrix[strain1][strain2]])
def main():
## output of afa-pairwise.pl
infile = 'data/2016.07.27/st4.dist'
diff_matrix = read_in_matrix(infile)
print_matrix(diff_matrix)
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment