This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# Takes input in the form of: | |
# OTU6 d:Viridiplantae,k:Streptophyta,p:asterids,c:Gentianales,o:Rubiaceae; | |
# OTU7 d:Viridiplantae,k:Streptophyta; | |
# and outputs to: | |
# OTU6 d__Viridiplantae;k__Streptophyta;p__asterids;c__Gentianales;o__Rubiaceae;f__;g__;s__ | |
# OTU7 d__Viridiplantae;k__Streptophyta;p__;c__;o__;f__;g__;s__ | |
# WARNING! This script will fail if there are extraneous other ':' in the taxonomy labels. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env python | |
# Created by: Michael S. Robeson II | |
from cogent import LoadSeqs | |
import string | |
def read_sequence_file(file_path): | |
"""Reads in an ALIGNED fasta formated sequence file | |
-file_path : path to the file to be read in | |
""" | |
seq_data = LoadSeqs(file_path, alignment=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
from cogent.parse.fasta import MinimalFastaParser | |
def check_for_iupac(seq): | |
"""Flag seqs with IUPAC amibuity codes | |
like RYSWKMBDHVN | |
""" | |
iupac = 'RYSWKMBDHVN' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# Mike Robeson (GitHub: @mikerobeson) | |
# Needs PyCogent installed. | |
from cogent.parse.fastq import MinimalFastqParser | |
from cogent.parse.fasta import MinimalFastaParser | |
from os.path import abspath, join, splitext, dirname, basename | |
from os import system, mkdir | |
from optparse import OptionParser, OptionGroup |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# Mike Robeson | |
# quickly hacked / draft code to build a compatable fasta file for use in usearch v8.1 | |
# See: http://www.drive5.com/usearch/manual/utax_user_train.html | |
from optparse import OptionParser, OptionGroup | |
from cogent.parse.ncbi_taxonomy import NcbiTaxonomyFromFiles | |
from cogent.parse.fasta import MinimalFastaParser | |
import unicodedata |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env python | |
# This script will extract a region of an aligment | |
from skbio.io import read as read_fasta | |
import argparse | |
def extract_region(seq_str, startp, endp): | |
return seq_str[int(startp):int(endp)] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/ur/bin/env python | |
# By: Mike Robeson & Se-ran Jun Nov 20, 2018 | |
# I ran this code within the `qiime2-2018.11` environment. | |
# Simple concept code to prepare a Greengenes-like taxonomy for SILVA (v132). | |
from skbio.tree import TreeNode | |
import re | |
allowed_ranks_list = [('domain','d__'), ('kingdom','k__'), ('phylum','p__'), | |
('class','c__'), ('order','o__'), ('family','f__'), | |
('genus','g__')] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# Mike Robeson (GitHub: @mikerobeson) | |
# A quick hacked together script to allow the use of multiple cpus to run ITSx | |
# (http://microbiology.se/software/itsx/) much faster. Code based on: | |
# https://medium.com/@thechriskiehl/parallelism-in-one-line-40e9b2b36148 | |
# Needs PyCogent & ITSx installed. | |
from cogent.parse.fastq import MinimalFastqParser | |
from cogent.parse.fasta import MinimalFastaParser |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# There are times when the R1/R2 reads or merged reads fastq files become out of sync | |
# with its respective index reads file (quality filtering, failed merges, etc...). This | |
# will cause the `split_libraries_fastq.py` script in QIIME to fail. | |
# This script will iterate through the index reads file and remove any indexes that do not | |
# have any corresponding read in the main reads file; making appropriate input for | |
# `split_libraries_fastq.py`. | |
# I pulled this chunk of code out the `join_paired_ends.py` script I wrote for QIIME v1.8 | |
# and later. Specifically, the code activated by the `-b` flag of `join_paired_ends.py`. |