Skip to content

Instantly share code, notes, and snippets.

@walterst
walterst / enumerate_fasta.py
Created November 20, 2013 23:57
Usage: python enumerate_fasta.py X > Y where X is the input fasta file Y is the output fasta
#!/usr/bin/env python
"""Usage:
python enumerate_fasta.py X > Y
where
X is the input fasta file
Y is the output fasta file
"""
from sys import argv
@walterst
walterst / filter_short_reads.py
Created November 22, 2013 15:49
Usage: python filter_short_reads.py X Y > Z where X is the input fasta file Y is the minimum length Z is the output fasta file
#!/usr/bin/env python
"""Usage:
python filter_short_reads.py X Y > Z
where
X is the input fasta file
Y is the minimum length
Z is the output fasta file
"""
#!/usr/bin/env python
from __future__ import division
__author__ = "William Walters"
__copyright__ = "Copyright 2011, The QIIME Project"
__credits__ = ["William Walters"]
__license__ = "GPL"
__version__ = "1.8.0-dev"
__maintainer__ = "William Walters"
__email__ = "William.A.Walters@colorado.edu"
@walterst
walterst / get_barcode_freqs.py
Last active August 29, 2015 13:56
Usage: python get_barcode_freqs.py bc_fastq_fp output_text_fp barcode_length
#!/usr/bin/env python
from __future__ import division
from sys import argv
from qiime.util import parse_command_line_parameters, make_option, gzip_open
if argv[1].endswith('.gz'):
f = gzip_open(argv[1])
else:
#!/usr/bin/env python
from __future__ import division
__author__ = "William Walters"
__copyright__ = "Copyright 2011, The QIIME Project"
__credits__ = ["William Walters"]
__license__ = "GPL"
__version__ = "1.8.0-dev"
__maintainer__ = "William Walters"
__email__ = "William.A.Walters@colorado.edu"
@walterst
walterst / parse_fasta_errors.py
Created March 10, 2014 00:47
Usage: pyton parse_fasta_errors.py input_sequence output_sequence
#!/usr/bin/env python
from sys import argv
fasta_f = open(argv[1], "U")
fasta_out = open(argv[2], "w")
# This script will parse out labels from a fasta label that do not have sequences
# Will only work for case of one line for the label, one line for the sequences.
#!/usr/bin/env python
from __future__ import division
__author__ = "William Walters"
__copyright__ = "Copyright 2011, The QIIME Project"
__credits__ = ["William Walters"]
__license__ = "GPL"
__version__ = "1.8.0-dev"
__maintainer__ = "William Walters"
__email__ = "William.A.Walters@colorado.edu"
#!/usr/bin/env python
from __future__ import division
__author__ = "William Walters"
__copyright__ = "Copyright 2011, The QIIME Project"
__credits__ = ["William Walters"]
__license__ = "GPL"
__version__ = "1.8.0-dev"
__maintainer__ = "William Walters"
__email__ = "William.A.Walters@colorado.edu"
@walterst
walterst / extract_between_sites.py
Last active August 29, 2015 13:59
Extract region between two specified DNA sequence sites in a fastq file, write this to output fastq file. This is for you, Dave.
#!/usr/bin/env python
from __future__ import division
__author__ = "William Walters"
__copyright__ = "Copyright 2011, The QIIME Project"
__credits__ = ["William Walters"]
__license__ = "GPL"
__version__ = "1.8.0-dev"
__maintainer__ = "William Walters"
__email__ = "William.A.Walters@colorado.edu"
@walterst
walterst / parse_nonstandard_chars.py
Last active February 10, 2022 10:21
Usage: python parse_nonstandard_chars.py X > Y where X is the input file to be parsed, and Y is the output parsed file
#!/usr/bin/env python
"""Somewhat hackish way to eliminate non-ASCII characters in a text file,
such as a taxonomy mapping file, with QIIME. Reads through the file, and
removes all characters above decimal value 127. Additionally, asterisk "*"
characters are removed, as these inhibit the RDP classifier.
Usage:
python parse_nonstandard_chars.py X > Y
where X is the input file to be parsed, and Y is the output parsed file"""