Tony walterst

## A_linear_mixed_models_microbiome.Rmd
# The initial part of this script has settings for filepaths, parameters, metadata.
# many parts may need to be altered based upon input data changes, metadata fields used, etc.

library('data.table')
library('dtplyr')
library('tidyverse')
library('glmmTMB')
library('ggplot2')
library('broom')
library('DHARMa')

## MME_R_script_growth_modeling.txt
# This code will read in the STARR heights and weight data that accompanied the article:
# "A modified Michaelis-Menten equation estimates growth from birth to 3 years in healthy babies in the US"
# The filepaths will need to be modified for the correct local filepath. dplyr and ggplot2, gplots, & gridExtra graphics
# libraries are needed. Interpolation of weight/heights from a given age in days
# would be done through the predict() function, passing the fitted model and a dataframe of days.
# Subjects that fail to fit due to errors with nls() will be plotted as raw data, if errors occur.
# Increase the default number_of_subjects_to_fit to 100 to see an example.

library(dplyr)
library(ggplot2)

## parse_ipod_to_metadata.py
#!/usr/bin/env python
from __future__ import division
# USAGE: python parse_ipod_to_metadata.py mapping_file days_to_consider ipod_tab_delim_file raw_output_file qiime_compatible_output_file
# where days_to_consider counts the same-day as one of the days, and comma-seperated columns needs to be
# an exact match to the field label in the ipod data file, e.g. Gastrointestinal_issues
# All dates must be in the format of DD/MM/YY in the ipod source tab delimited data.


from sys import argv
from operator import itemgetter

## random_subsample_fastq.py
#!/usr/bin/env

from sys import argv
from random import random

#from gzip import open as gz_open
from glob import glob

import gzip
import os

## find_fastq_errors.py
#!/usr/bin/env python

# Used to find fastq seqs in gzipped files, write first error, if any, to a log file
# Usage:  python find_fastq_errors.py fastq_folder log_file
# where fastq_folder has all of the fastq files in it-will search subdirectories

from sys import argv
from glob import glob

import gzip

## record_singletons.py
#!/usr/bin/env python

"""Usage: python record_singletons.py X Y
where X is the input OTU mapping file and Y is the output singleton sequence ID file.
"""

from sys import argv

otu_mapping = open(argv[1], "U")
singletons_out = open(argv[2], "w")

## parse_otu_mapping_from_uc.py
#!/usr/bin/env python

""" This is modified from the bfillings usearch app controller

usage: python parse_otu_mapping_from_uc.py X Y
where X is the input .uc file, Y is the output OTU mapping file"""


from sys import argv

## get_rank_sorted_data.py
#!/usr/bin/env python

from sys import argv

from operator import itemgetter
from scipy.stats import rankdata
from numpy import log

from biom import load_table

## filter_barcode_header.py
#!/usr/bin/env python


# Usage:  python filter_barcode_header.py original_barcode_seqs.fastq new_barcode_seqs.fastq
# WARNING-the second file specified will be overwritten if it exists!

bc_start_indicator = "1:N:0:"
chars_to_strip = ["+"]

from sys import argv

## count_zipped_fastq_reads.py
#!/usr/bin/env python

# Used to count fastq seqs in gzipped files, write counts and file name to log file
# Usage:  python count_zipped_fastq_reads.py fastq_folder log_file
# where fastq_folder has all of the fastq files in it (doesn't search subdirectories)

from sys import argv
from glob import glob

from cogent.parse.fastq import MinimalFastqParser
	# The initial part of this script has settings for filepaths, parameters, metadata.
	# many parts may need to be altered based upon input data changes, metadata fields used, etc.

	library('data.table')
	library('dtplyr')
	library('tidyverse')
	library('glmmTMB')
	library('ggplot2')
	library('broom')
	library('DHARMa')
	# This code will read in the STARR heights and weight data that accompanied the article:
	# "A modified Michaelis-Menten equation estimates growth from birth to 3 years in healthy babies in the US"
	# The filepaths will need to be modified for the correct local filepath. dplyr and ggplot2, gplots, & gridExtra graphics
	# libraries are needed. Interpolation of weight/heights from a given age in days
	# would be done through the predict() function, passing the fitted model and a dataframe of days.
	# Subjects that fail to fit due to errors with nls() will be plotted as raw data, if errors occur.
	# Increase the default number_of_subjects_to_fit to 100 to see an example.

	library(dplyr)
	library(ggplot2)
	#!/usr/bin/env python
	from __future__ import division
	# USAGE: python parse_ipod_to_metadata.py mapping_file days_to_consider ipod_tab_delim_file raw_output_file qiime_compatible_output_file
	# where days_to_consider counts the same-day as one of the days, and comma-seperated columns needs to be
	# an exact match to the field label in the ipod data file, e.g. Gastrointestinal_issues
	# All dates must be in the format of DD/MM/YY in the ipod source tab delimited data.


	from sys import argv
	from operator import itemgetter
	#!/usr/bin/env

	from sys import argv
	from random import random

	#from gzip import open as gz_open
	from glob import glob

	import gzip
	import os
	#!/usr/bin/env python

	# Used to find fastq seqs in gzipped files, write first error, if any, to a log file
	# Usage: python find_fastq_errors.py fastq_folder log_file
	# where fastq_folder has all of the fastq files in it-will search subdirectories

	from sys import argv
	from glob import glob

	import gzip
	#!/usr/bin/env python

	"""Usage: python record_singletons.py X Y
	where X is the input OTU mapping file and Y is the output singleton sequence ID file.
	"""

	from sys import argv

	otu_mapping = open(argv[1], "U")
	singletons_out = open(argv[2], "w")
	#!/usr/bin/env python

	""" This is modified from the bfillings usearch app controller

	usage: python parse_otu_mapping_from_uc.py X Y
	where X is the input .uc file, Y is the output OTU mapping file"""


	from sys import argv
	#!/usr/bin/env python


	# Usage: python filter_barcode_header.py original_barcode_seqs.fastq new_barcode_seqs.fastq
	# WARNING-the second file specified will be overwritten if it exists!

	bc_start_indicator = "1:N:0:"
	chars_to_strip = ["+"]

	from sys import argv
	#!/usr/bin/env python

	# Used to count fastq seqs in gzipped files, write counts and file name to log file
	# Usage: python count_zipped_fastq_reads.py fastq_folder log_file
	# where fastq_folder has all of the fastq files in it (doesn't search subdirectories)

	from sys import argv
	from glob import glob

	from cogent.parse.fastq import MinimalFastqParser