Skip to content

Instantly share code, notes, and snippets.

#!/usr/bin/env python
from __future__ import division
__author__ = "William Walters"
__copyright__ = "Copyright 2011, The QIIME Project"
__credits__ = ["William Walters"]
__license__ = "GPL"
__version__ = "1.8.0-dev"
__maintainer__ = "William Walters"
__email__ = "William.A.Walters@colorado.edu"
@walterst
walterst / get_barcode_freqs.py
Last active August 29, 2015 13:56
Usage: python get_barcode_freqs.py bc_fastq_fp output_text_fp barcode_length
#!/usr/bin/env python
from __future__ import division
from sys import argv
from qiime.util import parse_command_line_parameters, make_option, gzip_open
if argv[1].endswith('.gz'):
f = gzip_open(argv[1])
else:
#!/usr/bin/env python
from __future__ import division
__author__ = "William Walters"
__copyright__ = "Copyright 2011, The QIIME Project"
__credits__ = ["William Walters"]
__license__ = "GPL"
__version__ = "1.8.0-dev"
__maintainer__ = "William Walters"
__email__ = "William.A.Walters@colorado.edu"
@walterst
walterst / parse_fasta_errors.py
Created March 10, 2014 00:47
Usage: pyton parse_fasta_errors.py input_sequence output_sequence
#!/usr/bin/env python
from sys import argv
fasta_f = open(argv[1], "U")
fasta_out = open(argv[2], "w")
# This script will parse out labels from a fasta label that do not have sequences
# Will only work for case of one line for the label, one line for the sequences.
#!/usr/bin/env python
from __future__ import division
__author__ = "William Walters"
__copyright__ = "Copyright 2011, The QIIME Project"
__credits__ = ["William Walters"]
__license__ = "GPL"
__version__ = "1.8.0-dev"
__maintainer__ = "William Walters"
__email__ = "William.A.Walters@colorado.edu"
#!/usr/bin/env python
from __future__ import division
__author__ = "William Walters"
__copyright__ = "Copyright 2011, The QIIME Project"
__credits__ = ["William Walters"]
__license__ = "GPL"
__version__ = "1.8.0-dev"
__maintainer__ = "William Walters"
__email__ = "William.A.Walters@colorado.edu"
@walterst
walterst / extract_between_sites.py
Last active August 29, 2015 13:59
Extract region between two specified DNA sequence sites in a fastq file, write this to output fastq file. This is for you, Dave.
#!/usr/bin/env python
from __future__ import division
__author__ = "William Walters"
__copyright__ = "Copyright 2011, The QIIME Project"
__credits__ = ["William Walters"]
__license__ = "GPL"
__version__ = "1.8.0-dev"
__maintainer__ = "William Walters"
__email__ = "William.A.Walters@colorado.edu"
@walterst
walterst / trim_fasta.py
Last active August 29, 2015 14:02
Usage: python trim_fasta.py X Y Z where X is the input fasta file Y is the output fasta file Z is length to trim from beginning of the reads
#!/usr/bin/env python
"""Usage:
python trim_fasta.py X Y Z
where
X is the input fasta file
Y is the output fasta file
Z is length to trim from beginning of the reads
"""
@walterst
walterst / find_all_gap_positions.py
Last active August 29, 2015 14:05
This file is used to generate a file (similar to the Greengenes lanemask) with 0s and 1s, representing positions that are all gaps (0) or contain non-gap (. or -) characters (1) for a given input aligned fasta file.
#!/usr/bin/env python
# Usage: python find_all_gap_positions.py X Y
# where X is the input aligned fasta file, Y is the output text file for gap
# non gapped positions
from sys import argv
from cogent.parse.fasta import MinimalFastaParser
@walterst
walterst / parse_to_7_taxa_levels.py
Created September 18, 2014 23:00
# Usage: python parse_to_7_taxa_levels.py X Y # where X is the input taxonomy mapping file, Y is the output taxonomy mapping file # Purpose is to parse output of Mike Robeson's script to force taxa into # 7 levels.
#!/usr/bin/env python
from sys import argv
# Usage: python parse_to_7_taxa_levels.py X Y
# where X is the input taxonomy mapping file, Y is the output taxonomy mapping file
# Purpose is to parse output of Mike Robeson's script to force taxa into
# 7 levels.
taxa_mapping = open(argv[1], "U")
parsed_taxa = open(argv[2], "w")