Greg Caporaso gregcaporaso

## variability_v_diversity.py
#!/usr/bin/env python
# File created on 26 Feb 2014
from __future__ import division

__author__ = "Greg Caporaso"
__copyright__ = "Copyright 2014, The QIIME Project"
__credits__ = ["Greg Caporaso"]
__license__ = "GPL"
__version__ = "1.8.0-dev"
__maintainer__ = "Greg Caporaso"

## check_illumina_barcodes.py
#!/usr/bin/env python
# File created on 01 Dec 2011
from __future__ import division

__author__ = "Greg Caporaso"
__copyright__ = "Copyright 2011, The QIIME project"
__credits__ = ["Greg Caporaso"]
__license__ = "GPL"
__version__ = "1.3.0-dev"
__maintainer__ = "Greg Caporaso"

## fq_qual.py
#!/usr/bin/env python

from sys import argv
from random import random
from cogent.parse.fastq import MinimalFastqParser
from cogent.draw.distribution_plots import generate_box_plots
from qiime.quality import ascii_to_phred33
from qiime.util import qiime_open

def fastq_quality_plots(fastq_records,

## compare_pre_post_distances.py
#!/usr/bin/env python
# Authors: Greg Caporaso, John Chase
# Questions: Contact gregcaporaso@gmail.com

# Step 1: Generate lists of pre/post sample ids on a per-individual basis
# qiime.group.extract_per_individual_states_from_sample_metadata
# will let you generate a dict of individual id to (pre sample-id, post sample-id)

# Step 2: Extract distances for pre/post sample ids
# qiime.parse.parse_distmat_to_dict

## README.md

      
              4 files
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                gregcaporaso
                / README.md
            
            
              Created
              August 23, 2013 14:54
            
              
                Example files used while developing pyqi's Getting Started tutorials.
              
          
    These files were used while developing pyqi's Getting Started tutorials. See those documents for usage examples.

  
## README.md

      
              3 files
            
          
              0 forks
            
          
              3 comments
            
          
              0 stars
            
          
                gregcaporaso
                / README.md
            
            
              Last active
              December 21, 2015 02:39
            
              
                Code and analysis notes for determine the taxonomic-specificity of a set of sequences with associated taxonomy strings. This has been tested with the Greengenes 13_5 database. See README.md for usage instructions and some analysis notes.
              
          
    Taxonomic specificity of sequences in Greengenes

Here I'm creating a hash of expected 515F/806R amplicons from the Greengenes OTUs (for a couple of different sizes of OTUs), and comparing the uniqueness of sequences with the number of different taxonomic identities at each level.
There are basically three categories of sequences:

those that are unique, and therefore can only map to a single taxa
those that are not unique, but still only map to a single taxa
those that are not unique, and map to multiple taxa.


## reorg-dir-structure.py
                                                                                                                                             41,9          All
#!/usr/bin/env python
# Author: Greg Caporaso

from os.path import join, isdir
from glob import glob

base_in_dir = "/home/caporaso/analysis/short-read-tax-assignment/data/qiime-mock-community/multiple_assign_taxonomy_output/"
base_out_dir = "/home/caporaso/analysis/short-read-tax-assignment/data/eval-pre-computed/"

## generate_usearch_cmds.py
#!/usr/bin/env python

from os.path import join

query_fp = "/home/caporaso/analysis/short-read-tax-assignment/data/qiime-mock-community/S16S-2/rep_set.fna"
reference_seqs_fp = "/data/gg_13_5_otus/rep_set/97_otus.fasta"
reference_tax_fp = "/data/gg_13_5_otus/taxonomy/97_otu_taxonomy.txt"
input_biom_fp = "/home/caporaso/analysis/short-read-tax-assignment/data/qiime-mock-community/S16S-2/otu_table_mc2_no_pynast_failures.biom"
output_biom_fn = "otu_table_mc2_no_pynast_failures_w_taxa.biom"
output_dir = "/home/caporaso/analysis/short-read-tax-assignment/demo/eval-demo/usearch_v_97/"

## uc_fast_params.txt
pick_otus:enable_rev_strand_match True
pick_otus:max_accepts 1
pick_otus:max_rejects 8
pick_otus:stepwords 8
pick_otus:word_length 8

## README.md

      
              3 files
            
          
              0 forks
            
          
              0 comments
            
          
              1 star
            
          
                gregcaporaso
                / README.md
            
            
              Last active
              December 19, 2015 05:18
            
              
                Code for demultiplexing fastq data where index reads and barcodes are included in the beginning of sequences. This code depends on QIIME 1.7.0.
              
          
    Code for demultiplexing fastq data where index reads and barcodes are included in the beginning of sequences. This code depends on QIIME 1.7.0.
To run this code and pass the results into split_libraries_fastq.py:
prep_sl_fastq.py -b AmpF_25k.fastq.gz -m mapping.txt -o prepped_fastq
cd prepped_fastq
split_libraries_fastq.py -i AmpF_25k.fastq.amplicon.fastq -b AmpF_25k.fastq.barcode.fastq -m ../mapping.txt -o slout/ --barcode_type 12
	#!/usr/bin/env python
	# File created on 26 Feb 2014
	from __future__ import division

	__author__ = "Greg Caporaso"
	__copyright__ = "Copyright 2014, The QIIME Project"
	__credits__ = ["Greg Caporaso"]
	__license__ = "GPL"
	__version__ = "1.8.0-dev"
	__maintainer__ = "Greg Caporaso"
	#!/usr/bin/env python
	# File created on 01 Dec 2011
	from __future__ import division

	__author__ = "Greg Caporaso"
	__copyright__ = "Copyright 2011, The QIIME project"
	__credits__ = ["Greg Caporaso"]
	__license__ = "GPL"
	__version__ = "1.3.0-dev"
	__maintainer__ = "Greg Caporaso"
	#!/usr/bin/env python

	from sys import argv
	from random import random
	from cogent.parse.fastq import MinimalFastqParser
	from cogent.draw.distribution_plots import generate_box_plots
	from qiime.quality import ascii_to_phred33
	from qiime.util import qiime_open

	def fastq_quality_plots(fastq_records,
	#!/usr/bin/env python
	# Authors: Greg Caporaso, John Chase
	# Questions: Contact gregcaporaso@gmail.com

	# Step 1: Generate lists of pre/post sample ids on a per-individual basis
	# qiime.group.extract_per_individual_states_from_sample_metadata
	# will let you generate a dict of individual id to (pre sample-id, post sample-id)

	# Step 2: Extract distances for pre/post sample ids
	# qiime.parse.parse_distmat_to_dict
	41,9 All
	#!/usr/bin/env python
	# Author: Greg Caporaso

	from os.path import join, isdir
	from glob import glob

	base_in_dir = "/home/caporaso/analysis/short-read-tax-assignment/data/qiime-mock-community/multiple_assign_taxonomy_output/"
	base_out_dir = "/home/caporaso/analysis/short-read-tax-assignment/data/eval-pre-computed/"
	#!/usr/bin/env python

	from os.path import join

	query_fp = "/home/caporaso/analysis/short-read-tax-assignment/data/qiime-mock-community/S16S-2/rep_set.fna"
	reference_seqs_fp = "/data/gg_13_5_otus/rep_set/97_otus.fasta"
	reference_tax_fp = "/data/gg_13_5_otus/taxonomy/97_otu_taxonomy.txt"
	input_biom_fp = "/home/caporaso/analysis/short-read-tax-assignment/data/qiime-mock-community/S16S-2/otu_table_mc2_no_pynast_failures.biom"
	output_biom_fn = "otu_table_mc2_no_pynast_failures_w_taxa.biom"
	output_dir = "/home/caporaso/analysis/short-read-tax-assignment/demo/eval-demo/usearch_v_97/"
	pick_otus:enable_rev_strand_match True
	pick_otus:max_accepts 1
	pick_otus:max_rejects 8
	pick_otus:stepwords 8
	pick_otus:word_length 8