Skip to content

Instantly share code, notes, and snippets.

@gregcaporaso
Last active August 29, 2015 13:56
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save gregcaporaso/9234421 to your computer and use it in GitHub Desktop.
Save gregcaporaso/9234421 to your computer and use it in GitHub Desktop.
first pass script for comparing median diversity versus median variability for a given category
#!/usr/bin/env python
# File created on 26 Feb 2014
from __future__ import division
__author__ = "Greg Caporaso"
__copyright__ = "Copyright 2014, The QIIME Project"
__credits__ = ["Greg Caporaso"]
__license__ = "GPL"
__version__ = "1.8.0-dev"
__maintainer__ = "Greg Caporaso"
__email__ = "gregcaporaso@gmail.com"
from numpy import median
import matplotlib.pyplot as plt
from cogent.maths.stats.test import correlation_test
from qiime.parse import parse_distmat
from qiime.parse import parse_mapping_file
from qiime.parse import parse_rarefaction
from qiime.group import get_grouped_distances
from qiime.compare_alpha_diversity import (
collapse_sample_diversities_by_category_value,
get_per_sample_average_diversities, get_category_value_to_sample_ids)
from qiime.util import parse_command_line_parameters, make_option
script_info = {}
script_info['brief_description'] = ""
script_info['script_description'] = ""
# Members of the tuple in script_usage are (title, description, example call)
script_info['script_usage'] = [("","","%prog -m smp-gut.tsv -d "
"unweighted_unifrac_dm.gut_ts_only.txt -a PD_whole_tree.txt -c PersonalID "
"-o out.txt -p out.pdf")]
script_info['output_description']= ""
script_info['required_options'] = [
make_option('-m', '--mapping_fp', type='existing_filepath',
help='the input mapping file'),
make_option('-d', '--distance_matrix_fp', type='existing_filepath',
help='the input distance matrix file'),
make_option('-a', '--collated_alpha_fp', type='existing_filepath',
help='the input alpha diversity table'),
make_option('-c', '--category',
help='grouping category (e.g., "PersonalID")'),
make_option('-o', '--output_fp', type="new_filepath",
help='the output filepath')
]
script_info['optional_options'] = [
make_option('-p', '--plot_output_fp', type="new_filepath",
help='the output filepath for the figure [default: no plot created]')
]
script_info['version'] = __version__
def main():
option_parser, opts, args = parse_command_line_parameters(**script_info)
distance_matrix_fp = opts.distance_matrix_fp
mapping_fp = opts.mapping_fp
collated_alpha_fp = opts.collated_alpha_fp
category = opts.category
output_fp = opts.output_fp
plot_output_fp = opts.plot_output_fp
dmh, dm = parse_distmat(open(distance_matrix_fp))
m, mh, _ = parse_mapping_file(open(mapping_fp))
within_indiv = get_grouped_distances(dmh, dm, mh, m, category)
per_indiv_median_variability = \
dict([(e[0], median(e[2])) for e in within_indiv])
rarefaction_data = parse_rarefaction(open(collated_alpha_fp))
category_value_to_sample_ids = \
get_category_value_to_sample_ids(open(mapping_fp), category)
per_sample_average_diversities = \
get_per_sample_average_diversities(rarefaction_data, depth=None)
per_category_value_average_diversities = \
collapse_sample_diversities_by_category_value(
category_value_to_sample_ids,
per_sample_average_diversities)
per_indiv_median_diversity = \
dict([(k, median(v)) for k,v in
per_category_value_average_diversities.items()])
common_ids = \
set(per_indiv_median_diversity) & \
set(per_indiv_median_variability)
data = []
for id in common_ids:
data.append((id,
per_indiv_median_diversity[id],
per_indiv_median_variability[id]))
diversities = [d[1] for d in data]
variabilities = [d[2] for d in data]
correlation_result = correlation_test(diversities, variabilities)
of = open(output_fp,'w')
of.write("#%s\tMedian Diversity\tMedian Variability\n" % category)
of.write("#Spearman rho: %f, Spearman Monte Carlo p: %f\n" %
(correlation_result[0], correlation_result[3]))
for d in data:
of.write('%s\t%f\t%f\n' % d)
of.close()
if plot_output_fp:
fig, ax = plt.subplots()
ax.scatter([d[1] for d in data],
[d[2] for d in data])
ax.set_xlim(0)
ax.set_ylim(0)
ax.set_xlabel('Median Diversity')
ax.set_ylabel('Median Variability')
plt.savefig(plot_output_fp)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment