Mike Robeson mikerobeson

## convert_taxonomy_from_usearch_to_rdp.py
#!/usr/bin/env python
# Takes input in the form of:
#	OTU6	d:Viridiplantae,k:Streptophyta,p:asterids,c:Gentianales,o:Rubiaceae;
#	OTU7	d:Viridiplantae,k:Streptophyta;
# and outputs to:
#	OTU6	d__Viridiplantae;k__Streptophyta;p__asterids;c__Gentianales;o__Rubiaceae;f__;g__;s__
#	OTU7	d__Viridiplantae;k__Streptophyta;p__;c__;o__;f__;g__;s__


# WARNING! This script will fail if there are extraneous other ':' in the taxonomy labels.

## aligned_fasta_to_phylip.py
#! /usr/bin/env python
# Created by: Michael S. Robeson II
from cogent import LoadSeqs
import string

def read_sequence_file(file_path):
    """Reads in an ALIGNED fasta formated sequence file
        -file_path : path to the file to be read in
    """
    seq_data = LoadSeqs(file_path, alignment=True)

## remove_seqs_with_n.py
#!/usr/bin/env python

from cogent.parse.fasta import MinimalFastaParser


def check_for_iupac(seq):
	"""Flag seqs with IUPAC amibuity codes
	like RYSWKMBDHVN
	"""
	iupac = 'RYSWKMBDHVN'

## split_fasta_by_num_seqs.py
#!/usr/bin/env python
# Mike Robeson (GitHub: @mikerobeson)
# Needs PyCogent installed.


from cogent.parse.fastq import MinimalFastqParser
from cogent.parse.fasta import MinimalFastaParser
from os.path import abspath, join, splitext, dirname, basename
from os import system, mkdir
from optparse import OptionParser, OptionGroup

## convert_gb_fasta_to_usearch_fasta.py
#!/usr/bin/env python
# Mike Robeson
# quickly hacked / draft code to build a compatable fasta file for use in usearch v8.1
# See: http://www.drive5.com/usearch/manual/utax_user_train.html

from optparse import OptionParser, OptionGroup
from cogent.parse.ncbi_taxonomy import NcbiTaxonomyFromFiles
from cogent.parse.fasta import MinimalFastaParser
import unicodedata

## extract_alignment_region.py
#! /usr/bin/env python
# This script will extract a region of an aligment

from skbio.io import read as read_fasta
import argparse


def extract_region(seq_str, startp, endp):
	return seq_str[int(startp):int(endp)]

## parse_tax_tre_slv_ssu_132.py
#!/ur/bin/env python
# By: Mike Robeson & Se-ran Jun Nov 20, 2018
# I ran this code within the `qiime2-2018.11` environment.
# Simple concept code to prepare a Greengenes-like taxonomy for SILVA (v132).
from skbio.tree import TreeNode
import re

allowed_ranks_list = [('domain','d__'), ('kingdom','k__'), ('phylum','p__'),
					  ('class','c__'), ('order','o__'), ('family','f__'),
					  ('genus','g__')]

## parallel_itsx.py
#!/usr/bin/env python
# Mike Robeson (GitHub: @mikerobeson)
# A quick hacked together script to allow the use of multiple cpus to run ITSx
# (http://microbiology.se/software/itsx/) much faster. Code based on:
# https://medium.com/@thechriskiehl/parallelism-in-one-line-40e9b2b36148
# Needs PyCogent & ITSx installed.


from cogent.parse.fastq import MinimalFastqParser
from cogent.parse.fasta import MinimalFastaParser

## remove_unused_barcodes.py
#!/usr/bin/env python

# There are times when the R1/R2 reads or merged reads fastq files become out of sync
# with its respective index reads file (quality filtering, failed merges, etc...). This
# will cause the `split_libraries_fastq.py` script in QIIME to fail.
# This script will iterate through the index reads file and remove any indexes that do not
# have any corresponding read in the main reads file; making appropriate input for
# `split_libraries_fastq.py`.
# I pulled this chunk of code out the `join_paired_ends.py` script I wrote for QIIME v1.8
# and later. Specifically, the code activated by the `-b` flag of `join_paired_ends.py`.
	#!/usr/bin/env python
	# Takes input in the form of:
	# OTU6 d:Viridiplantae,k:Streptophyta,p:asterids,c:Gentianales,o:Rubiaceae;
	# OTU7 d:Viridiplantae,k:Streptophyta;
	# and outputs to:
	# OTU6 d__Viridiplantae;k__Streptophyta;p__asterids;c__Gentianales;o__Rubiaceae;f__;g__;s__
	# OTU7 d__Viridiplantae;k__Streptophyta;p__;c__;o__;f__;g__;s__


	# WARNING! This script will fail if there are extraneous other ':' in the taxonomy labels.
	#! /usr/bin/env python
	# Created by: Michael S. Robeson II
	from cogent import LoadSeqs
	import string

	def read_sequence_file(file_path):
	"""Reads in an ALIGNED fasta formated sequence file
	-file_path : path to the file to be read in
	"""
	seq_data = LoadSeqs(file_path, alignment=True)
	#!/usr/bin/env python

	from cogent.parse.fasta import MinimalFastaParser


	def check_for_iupac(seq):
	"""Flag seqs with IUPAC amibuity codes
	like RYSWKMBDHVN
	"""
	iupac = 'RYSWKMBDHVN'
	#!/usr/bin/env python
	# Mike Robeson (GitHub: @mikerobeson)
	# Needs PyCogent installed.


	from cogent.parse.fastq import MinimalFastqParser
	from cogent.parse.fasta import MinimalFastaParser
	from os.path import abspath, join, splitext, dirname, basename
	from os import system, mkdir
	from optparse import OptionParser, OptionGroup
	#!/usr/bin/env python
	# Mike Robeson
	# quickly hacked / draft code to build a compatable fasta file for use in usearch v8.1
	# See: http://www.drive5.com/usearch/manual/utax_user_train.html

	from optparse import OptionParser, OptionGroup
	from cogent.parse.ncbi_taxonomy import NcbiTaxonomyFromFiles
	from cogent.parse.fasta import MinimalFastaParser
	import unicodedata
	#! /usr/bin/env python
	# This script will extract a region of an aligment

	from skbio.io import read as read_fasta
	import argparse



	def extract_region(seq_str, startp, endp):
	return seq_str[int(startp):int(endp)]
	#!/ur/bin/env python
	# By: Mike Robeson & Se-ran Jun Nov 20, 2018
	# I ran this code within the `qiime2-2018.11` environment.
	# Simple concept code to prepare a Greengenes-like taxonomy for SILVA (v132).
	from skbio.tree import TreeNode
	import re

	allowed_ranks_list = [('domain','d__'), ('kingdom','k__'), ('phylum','p__'),
	('class','c__'), ('order','o__'), ('family','f__'),
	('genus','g__')]
	#!/usr/bin/env python
	# Mike Robeson (GitHub: @mikerobeson)
	# A quick hacked together script to allow the use of multiple cpus to run ITSx
	# (http://microbiology.se/software/itsx/) much faster. Code based on:
	# https://medium.com/@thechriskiehl/parallelism-in-one-line-40e9b2b36148
	# Needs PyCogent & ITSx installed.


	from cogent.parse.fastq import MinimalFastqParser
	from cogent.parse.fasta import MinimalFastaParser
	#!/usr/bin/env python

	# There are times when the R1/R2 reads or merged reads fastq files become out of sync
	# with its respective index reads file (quality filtering, failed merges, etc...). This
	# will cause the `split_libraries_fastq.py` script in QIIME to fail.
	# This script will iterate through the index reads file and remove any indexes that do not
	# have any corresponding read in the main reads file; making appropriate input for
	# `split_libraries_fastq.py`.
	# I pulled this chunk of code out the `join_paired_ends.py` script I wrote for QIIME v1.8
	# and later. Specifically, the code activated by the `-b` flag of `join_paired_ends.py`.