Tony walterst

## MME_R_script_growth_modeling.txt
# This code will read in the STARR heights and weight data that accompanied the article:
# "A modified Michaelis-Menten equation estimates growth from birth to 3 years in healthy babies in the US"
# The filepaths will need to be modified for the correct local filepath. dplyr and ggplot2, gplots, & gridExtra graphics
# libraries are needed. Interpolation of weight/heights from a given age in days
# would be done through the predict() function, passing the fitted model and a dataframe of days.
# Subjects that fail to fit due to errors with nls() will be plotted as raw data, if errors occur.
# Increase the default number_of_subjects_to_fit to 100 to see an example.

library(dplyr)
library(ggplot2)

## strip_primers_fastq.py
#!/usr/bin/env python

# USAGE: strip_primers_fastq.py Mapping_file input_fasta output_fasta log_filename

from sys import argv
from string import upper
from re import compile

from skbio.parse.sequences import parse_fastq
from skbio.sequence import DNA

## merge_bcs_reads.py
#!/usr/bin/env python

from sys import argv
from itertools import izip

from cogent.parse.fastq import MinimalFastqParser

""" Usage
python merge_bcs_reads.py X Y Z
X: barcodes fastq file

## parse_nonstandard_chars.py
#!/usr/bin/env python

"""Somewhat hackish way to eliminate non-ASCII characters in a text file,
such as a taxonomy mapping file, with QIIME. Reads through the file, and
removes all characters above decimal value 127. Additionally, asterisk "*"
characters are removed, as these inhibit the RDP classifier.

Usage:
python parse_nonstandard_chars.py X > Y
where X is the input file to be parsed, and Y is the output parsed file"""

## parse_otu_mapping_from_uc.py
#!/usr/bin/env python

""" This is modified from the bfillings usearch app controller

usage: python parse_otu_mapping_from_uc.py X Y
where X is the input .uc file, Y is the output OTU mapping file"""


from sys import argv

## parse_ipod_to_metadata.py
#!/usr/bin/env python
from __future__ import division
# USAGE: python parse_ipod_to_metadata.py mapping_file days_to_consider ipod_tab_delim_file raw_output_file qiime_compatible_output_file
# where days_to_consider counts the same-day as one of the days, and comma-seperated columns needs to be
# an exact match to the field label in the ipod data file, e.g. Gastrointestinal_issues
# All dates must be in the format of DD/MM/YY in the ipod source tab delimited data.


from sys import argv
from operator import itemgetter

## random_subsample_fastq.py
#!/usr/bin/env

from sys import argv
from random import random

#from gzip import open as gz_open
from glob import glob

import gzip
import os

## find_fastq_errors.py
#!/usr/bin/env python

# Used to find fastq seqs in gzipped files, write first error, if any, to a log file
# Usage:  python find_fastq_errors.py fastq_folder log_file
# where fastq_folder has all of the fastq files in it-will search subdirectories

from sys import argv
from glob import glob

import gzip

## record_singletons.py
#!/usr/bin/env python

"""Usage: python record_singletons.py X Y
where X is the input OTU mapping file and Y is the output singleton sequence ID file.
"""

from sys import argv

otu_mapping = open(argv[1], "U")
singletons_out = open(argv[2], "w")

## get_rank_sorted_data.py
#!/usr/bin/env python

from sys import argv

from operator import itemgetter
from scipy.stats import rankdata
from numpy import log

from biom import load_table
	# This code will read in the STARR heights and weight data that accompanied the article:
	# "A modified Michaelis-Menten equation estimates growth from birth to 3 years in healthy babies in the US"
	# The filepaths will need to be modified for the correct local filepath. dplyr and ggplot2, gplots, & gridExtra graphics
	# libraries are needed. Interpolation of weight/heights from a given age in days
	# would be done through the predict() function, passing the fitted model and a dataframe of days.
	# Subjects that fail to fit due to errors with nls() will be plotted as raw data, if errors occur.
	# Increase the default number_of_subjects_to_fit to 100 to see an example.

	library(dplyr)
	library(ggplot2)
	#!/usr/bin/env python

	# USAGE: strip_primers_fastq.py Mapping_file input_fasta output_fasta log_filename

	from sys import argv
	from string import upper
	from re import compile

	from skbio.parse.sequences import parse_fastq
	from skbio.sequence import DNA
	#!/usr/bin/env python

	from sys import argv
	from itertools import izip

	from cogent.parse.fastq import MinimalFastqParser

	""" Usage
	python merge_bcs_reads.py X Y Z
	X: barcodes fastq file
	#!/usr/bin/env python

	"""Somewhat hackish way to eliminate non-ASCII characters in a text file,
	such as a taxonomy mapping file, with QIIME. Reads through the file, and
	removes all characters above decimal value 127. Additionally, asterisk "*"
	characters are removed, as these inhibit the RDP classifier.

	Usage:
	python parse_nonstandard_chars.py X > Y
	where X is the input file to be parsed, and Y is the output parsed file"""
	#!/usr/bin/env python

	""" This is modified from the bfillings usearch app controller

	usage: python parse_otu_mapping_from_uc.py X Y
	where X is the input .uc file, Y is the output OTU mapping file"""


	from sys import argv
	#!/usr/bin/env python
	from __future__ import division
	# USAGE: python parse_ipod_to_metadata.py mapping_file days_to_consider ipod_tab_delim_file raw_output_file qiime_compatible_output_file
	# where days_to_consider counts the same-day as one of the days, and comma-seperated columns needs to be
	# an exact match to the field label in the ipod data file, e.g. Gastrointestinal_issues
	# All dates must be in the format of DD/MM/YY in the ipod source tab delimited data.


	from sys import argv
	from operator import itemgetter
	#!/usr/bin/env

	from sys import argv
	from random import random

	#from gzip import open as gz_open
	from glob import glob

	import gzip
	import os
	#!/usr/bin/env python

	# Used to find fastq seqs in gzipped files, write first error, if any, to a log file
	# Usage: python find_fastq_errors.py fastq_folder log_file
	# where fastq_folder has all of the fastq files in it-will search subdirectories

	from sys import argv
	from glob import glob

	import gzip
	#!/usr/bin/env python

	"""Usage: python record_singletons.py X Y
	where X is the input OTU mapping file and Y is the output singleton sequence ID file.
	"""

	from sys import argv

	otu_mapping = open(argv[1], "U")
	singletons_out = open(argv[2], "w")