Skip to content

Instantly share code, notes, and snippets.

@meren
Last active December 10, 2015 02:18
Show Gist options
  • Save meren/4366440 to your computer and use it in GitHub Desktop.
Save meren/4366440 to your computer and use it in GitHub Desktop.
Generate ENVIRONMENT file and matrices for OTU/sample distribution from *_otus.txt file (result of qiime's pick_otus.py).
# coding: utf-8
import sys
from Oligotyping.utils.utils import generate_ENVIRONMENT_file
from Oligotyping.utils.utils import get_oligos_sorted_by_abundance
from Oligotyping.utils.utils import get_unit_counts_and_percents
from Oligotyping.utils.utils import generate_MATRIX_files
from Oligotyping.utils.utils import get_filtered_samples_dict
from Oligotyping.utils.utils import get_samples_dict_from_environment_file
samples_dict = {}
for line in open(sys.argv[1]).readlines():
fields = line.strip().split('\t')
otu_number = 'otu_' + fields[0]
read_ids = fields[1:]
for read_id in read_ids:
dataset = '_'.join(read_id.split('_')[:-1])
if not samples_dict.has_key(dataset):
samples_dict[dataset] = {}
samples_dict[dataset][str(otu_number)] = 1
else:
if not samples_dict[dataset].has_key(str(otu_number)):
samples_dict[dataset][str(otu_number)] = 1
else:
samples_dict[dataset][str(otu_number)] += 1
units = get_oligos_sorted_by_abundance(samples_dict, min_abundance = 4)
units.reverse()
samples_dict = get_filtered_samples_dict(units, samples_dict.keys(), samples_dict)
generate_ENVIRONMENT_file(samples_dict.keys(), samples_dict, sys.argv[1] + '-ENVIRONMENT.txt')
unit_counts, unit_percents = get_unit_counts_and_percents(units, samples_dict)
generate_MATRIX_files(units, samples_dict.keys(), unit_counts, unit_percents, sys.argv[1] + '-MATRIX-COUNT.txt', sys.argv[1] + '-MATRIX-PERCENT.txt')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment