Matt Ravenhall mattravenhall

## uniqID.sh
uniqID=$(date +%s)
file='file_'${uniqID}'.txt'

## splitRegion.sh
# Here the function assumes you're passing a standard form genomic position (chr:bpA-bpB) as the first argument.
function splitRegion {
        IFS=':' read -ra REGION <<< "$1"
        chr="${REGION[0]}"
        IFS='-' read -ra LOCATION <<< "{REGION[1]}"
        bpA="${LOCATION[0]}"
        bpB="${LOCATION[1]}"
}

splitRegion $1

## getHaploFreqs.py
import os
import datetime

def printT(message, flush=True):
    print(datetime.datetime.now().strftime('\033[96m[%d-%b-%Y %H:%M:%S]\033[0m '+message),flush=flush)

outFile = 'outFile'
if autoAll:
  samples = os.popen("ls -m ERR*.bam | sed -e 's/, /,/g' | tr -d '\n'").read().split(',')
else:

## fasta2genbank.py
# Fasta to Genbank (dna)
filename = 'example_fasta'

from Bio import SeqIO
from Bio.Alphabet import generic_dna

seqs = list(SeqIO.parse(filename+'.fa','fasta'))
for seq in seqs:
	seq.seq.alphabet = generic_dna
SeqIO.write(seqs, filename+'.gbk', 'genbank')

## writeFasta.py
def writeFasta(titles=[], sequences=[], filename='tmp.fasta'):
	if len(titles) != len(sequences):
		titles = ['contig_{}'.format(i) for i in range(len(sequences))]

	# Initial new file
	with open(filename, 'w') as fasta:
		fasta.write('')

	with open(filename, 'a') as fasta:
		for t, s in zip(titles,sequences):

## splitDF.py
# Split a dataframe into 'splits' number subsets of equal size.

def splitDF(dataframe, splits):
    assert isinstance(dataframe, pd.DataFrame), "Supplied 'dataframe' must be a pandas dataframe."
    assert isinstance(splits, int), "Supplied 'splits' must be an integer."
    assert dataframe.shape[0] >= splits, "Supplied 'splits' must exceed or match the number of rows in 'dataframe'."

    split_size = round(dataframe.shape[0] / int(splits))
    outputs = []
    for i in range(splits):

## embl2fasta.py
# embl to fasta
import re
import sys

if len(sys.argv) != 3:
	print('Usage: embl2fasta.py <embl_input_file> <fasta_output_name>')
	sys.exit()

IDset = False
inFile = sys.argv[1]	# 'example.embl'

## grepWrapper.sh
input=''

if [ "$1" = '' ]; then
  input='quit'
  echo "Please provide file to search as 'grepWrapper.sh <fileToSearch>'"
fi

while [ "${input}" != 'quit' ]; do
  echo "Please provide search sequence, or 'quit':"
  read input

## runlog.py
import os
import time
import psutil

# Memory logger
def memlog(info=''):
    if not os.path.isfile('usage.log'):
        with open('usage.log', 'a') as f:
            f.write('Time,CPU,Memory,Info\n')
    ctime = time.time()

## splitFasta.sh
csplit --suffix-format='%02d.fasta' --prefix='foo_' foo.fasta '/>/+0' "{*}"
	# Here the function assumes you're passing a standard form genomic position (chr:bpA-bpB) as the first argument.
	function splitRegion {
	IFS=':' read -ra REGION <<< "$1"
	chr="${REGION[0]}"
	IFS='-' read -ra LOCATION <<< "{REGION[1]}"
	bpA="${LOCATION[0]}"
	bpB="${LOCATION[1]}"
	}

	splitRegion $1
	import os
	import datetime

	def printT(message, flush=True):
	print(datetime.datetime.now().strftime('\033[96m[%d-%b-%Y %H:%M:%S]\033[0m '+message),flush=flush)

	outFile = 'outFile'
	if autoAll:
	samples = os.popen("ls -m ERR*.bam \| sed -e 's/, /,/g' \| tr -d '\n'").read().split(',')
	else:
	# Fasta to Genbank (dna)
	filename = 'example_fasta'

	from Bio import SeqIO
	from Bio.Alphabet import generic_dna

	seqs = list(SeqIO.parse(filename+'.fa','fasta'))
	for seq in seqs:
	seq.seq.alphabet = generic_dna
	SeqIO.write(seqs, filename+'.gbk', 'genbank')
	def writeFasta(titles=[], sequences=[], filename='tmp.fasta'):
	if len(titles) != len(sequences):
	titles = ['contig_{}'.format(i) for i in range(len(sequences))]

	# Initial new file
	with open(filename, 'w') as fasta:
	fasta.write('')

	with open(filename, 'a') as fasta:
	for t, s in zip(titles,sequences):
	# Split a dataframe into 'splits' number subsets of equal size.

	def splitDF(dataframe, splits):
	assert isinstance(dataframe, pd.DataFrame), "Supplied 'dataframe' must be a pandas dataframe."
	assert isinstance(splits, int), "Supplied 'splits' must be an integer."
	assert dataframe.shape[0] >= splits, "Supplied 'splits' must exceed or match the number of rows in 'dataframe'."

	split_size = round(dataframe.shape[0] / int(splits))
	outputs = []
	for i in range(splits):
	# embl to fasta
	import re
	import sys

	if len(sys.argv) != 3:
	print('Usage: embl2fasta.py <embl_input_file> <fasta_output_name>')
	sys.exit()

	IDset = False
	inFile = sys.argv[1] # 'example.embl'
	input=''

	if [ "$1" = '' ]; then
	input='quit'
	echo "Please provide file to search as 'grepWrapper.sh <fileToSearch>'"
	fi

	while [ "${input}" != 'quit' ]; do
	echo "Please provide search sequence, or 'quit':"
	read input
	import os
	import time
	import psutil

	# Memory logger
	def memlog(info=''):
	if not os.path.isfile('usage.log'):
	with open('usage.log', 'a') as f:
	f.write('Time,CPU,Memory,Info\n')
	ctime = time.time()