Skip to content

Instantly share code, notes, and snippets.

@meren
Last active December 19, 2015 08:09
Show Gist options
  • Save meren/5923440 to your computer and use it in GitHub Desktop.
Save meren/5923440 to your computer and use it in GitHub Desktop.
# example fasta defline:
#>147406386-BM_GFKBOSR02F10DX|Bacteria;Actinobacteria;Actinobacteria;Actinomycetales;Micrococcaceae;Rothia
import sys
import Oligotyping.lib.fastalib as u
from Oligotyping.utils.utils import generate_ENVIRONMENT_file
from Oligotyping.utils.utils import get_sample_name_from_defline
from Oligotyping.utils.utils import get_oligos_sorted_by_abundance
from Oligotyping.utils.utils import get_unit_counts_and_percents
from Oligotyping.utils.utils import generate_MATRIX_files
from Oligotyping.utils.utils import get_filtered_samples_dict
f = u.SequenceSource(sys.argv[1])
samples_dict = {}
while f.next():
sample = '_'.join(f.id.split('_')[:-1])
taxon = f.id.split('|')[-1]
if not samples_dict.has_key(sample):
samples_dict[sample] = {}
if samples_dict[sample].has_key(taxon):
samples_dict[sample][taxon] += 1
else:
samples_dict[sample][taxon] = 1
samples = sorted(samples_dict.keys())
generate_ENVIRONMENT_file(samples, samples_dict, sys.argv[1] + '-ENVIRONMENT-FOR-GAST.txt')
units = get_oligos_sorted_by_abundance(samples_dict, min_abundance = 0)
units.reverse()
unit_counts, unit_percents = get_unit_counts_and_percents(units, samples_dict)
generate_MATRIX_files(units, sorted(samples_dict.keys()), unit_counts, unit_percents, sys.argv[1] + '-MATRIX-COUNT-FOR-GAST.txt', sys.argv[1] + '-MATRIX-PERCENT-FOR-GAST.txt')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment