Last active
March 5, 2020 18:13
-
-
Save jasonsahl/e9516b2d940ad2474ba6e97f5b856440 to your computer and use it in GitHub Desktop.
Parse Kmer frequencies from a Kmer matrix
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
"""parse frequencies from a Kmer matrix""" | |
from __future__ import division | |
import sys | |
import os | |
import optparse | |
from optparse import OptionParser | |
from collections import deque | |
def test_file(option, opt_str, value, parser): | |
try: | |
with open(value): setattr(parser.values, option.dest, value) | |
except IOError: | |
print('%s file cannot be opened' % option) | |
sys.exit() | |
def prune_matrix(matrix,resist,suscept): | |
"""prune out genomes of interest from a matrix. | |
Not done efficiently, but appears to work""" | |
#Group 1 is resistant | |
group1_ids = [] | |
#Group 2 is susceptible | |
group2_ids = [] | |
group1_out = open("resist_pruned.txt", "w") | |
group2_out = open("suscept_pruned.txt", "w") | |
with open(resist) as file_1: | |
for line in file_1: | |
line.strip("\n") | |
group1_ids.append(line) | |
with open(suscept) as file_2: | |
for line in file_2: | |
line.strip("\n") | |
group2_ids.append(line) | |
group1_ids = map(lambda s: s.strip(), group1_ids) | |
group2_ids = map(lambda s: s.strip(), group2_ids) | |
first_line = next(open(matrix)) | |
newline = first_line.strip("\n") | |
fields = newline.split() | |
original_fields = fields.copy() | |
group1_idx = [] | |
group2_idx = [] | |
group1_ids_list = list(group1_ids) | |
group2_ids_list = list(group2_ids) | |
for x in fields: | |
"""These are the genomes to prune out""" | |
if x not in group1_ids_list: group1_idx.append(fields.index(x)) | |
if x not in group2_ids_list: group2_idx.append(fields.index(x)) | |
#This is for group 1 | |
deque((list.pop(fields, i) for i in sorted(group1_idx, reverse=True)), maxlen=0) | |
fields.insert(0,"Kmer") | |
group1_out.write("\t".join(fields)+"\n") | |
#This is for group 2 | |
first_line = next(open(matrix)) | |
#original_fields.insert(0,"cluster") | |
deque((list.pop(original_fields, i) for i in sorted(group2_idx, reverse=True)), maxlen=0) | |
original_fields.insert(0,"Kmer") | |
#original_fields.insert(0,"Kmer") | |
group2_out.write("\t".join(original_fields)+"\n") | |
with open(matrix) as in_matrix: | |
in_matrix.readline() | |
for line in in_matrix: | |
newline = line.strip("\n") | |
fields = newline.split() | |
#Here I make a copy of these fields to prevent them being altered | |
fields_copy = fields.copy() | |
name = fields[0] | |
deque((list.pop(fields, i) for i in sorted(group1_idx, reverse=True)), maxlen=0) | |
group1_out.write("".join(name)+"\t"+"\t".join(fields)+"\n") | |
name = fields_copy[0] | |
deque((list.pop(fields_copy, i) for i in sorted(group2_idx, reverse=True)), maxlen=0) | |
group2_out.write("".join(name)+"\t"+"\t".join(fields_copy)+"\n") | |
group1_out.close() | |
group2_out.close() | |
def compare_values(resist,suscept): | |
resist_values = [] | |
suscept_values = [] | |
kmer_names = [] | |
with open(resist) as resist_file: | |
resist_file.readline() | |
for line in resist_file: | |
newline = line.strip() | |
fields = newline.split() | |
ints = list(map(int,fields[1:])) | |
resist_values.append(sum(ints)/len(ints)) | |
kmer_names.append(fields[0]) | |
with open(suscept) as suscept_file: | |
suscept_file.readline() | |
for line in suscept_file: | |
newline = line.strip() | |
fields = newline.split() | |
ints = list(map(int,fields[1:])) | |
suscept_values.append(sum(ints)/len(ints)) | |
resist_values.insert(0,"resistant") | |
suscept_values.insert(0,"susceptible") | |
sstrings = list(map(str,suscept_values)) | |
rstrings = list(map(str,resist_values)) | |
kmer_names.insert(0,"Kmer") | |
kmer_name_out = open("kmers.tmp", "w") | |
kmer_name_out.write("\n".join(kmer_names)) | |
kmer_name_out.close() | |
resist_out = open("resist.tmp", "w") | |
resist_out.write("\n".join(rstrings)) | |
resist_out.close() | |
suscept_out = open("suscept.tmp", "w") | |
suscept_out.write("\n".join(sstrings)) | |
suscept_out.close() | |
def report_differences(matrix,diff_value): | |
outfile = open("results_differences.txt", "w") | |
with open(matrix) as my_matrix: | |
for line in my_matrix: | |
if line.startswith("Kmer"): | |
outfile.write(line) | |
else: | |
fields = line.split() | |
if abs(float(fields[1])-float(fields[2]))>diff_value: | |
outfile.write(line) | |
outfile.close() | |
def main(kmer_matrix,resist_ids,suscept_ids,diff_value): | |
prune_matrix(kmer_matrix,resist_ids,suscept_ids) | |
compare_values("resist_pruned.txt","suscept_pruned.txt") | |
os.system("paste kmers.tmp resist.tmp suscept.tmp > combined.matrix") | |
report_differences("combined.matrix",diff_value) | |
os.system("rm kmers.tmp resist.tmp suscept.tmp") | |
if __name__ == "__main__": | |
parser = OptionParser(usage="usage: %prog [options]",version="%prog 0.0.0") | |
parser.add_option("-m", "--kmer_matrix", dest="kmer_matrix", | |
help="path to Kmer matrix [REQUIRED]", | |
type="string", action="callback", callback=test_file) | |
parser.add_option("-r", "--resist_ids", dest="resist_ids", | |
help="path to file with resistant IDs [REQUIRED]", | |
type="string", action="callback", callback=test_file) | |
parser.add_option("-s", "--suscept_ids", dest="suscept_ids", | |
help="path to file with susceptible IDs [REQUIRED]", | |
type="string", action="callback", callback=test_file) | |
parser.add_option("-d", "--diff_value", dest="diff_value", | |
help="diff value to report, defaults to 0.5", | |
type="float", action="store", default=0.5) | |
options, args = parser.parse_args() | |
mandatories = ["kmer_matrix","resist_ids","suscept_ids"] | |
for m in mandatories: | |
if not getattr(options, m, None): | |
print("\nMust provide %s.\n" %m) | |
parser.print_help() | |
exit(-1) | |
main(options.kmer_matrix,options.resist_ids,options.suscept_ids,options.diff_value) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment