Ivan Krukov ivan-krukov

## volume
! /usr/bin/osascript
on run argv
	set Volume (item 1 of argv)
end run

## kyles_script.py
import argparse
from Bio import SeqIO
parser=argparse.ArgumentParser()

parser.add_argument("inputFile", help="input fasta file")
parser.add_argument("outputFile",help="output file name")
parser.add_argument("sampleName",help="sample name to be removed")

args = parser.parse_args()

## fastaparse.py
#Read a fasta file and only keep the sequences with correct headers (id_pattern regex)

import re
import sys

seq_pattern = re.compile(r">[^>]+\n",re.MULTILINE)
id_pattern = re.compile(r"protein_id:(?P<id>[.\w]+)")

with open(sys.argv[1]) as f:
    text = f.read()

## splitter.sh
#!/bin/sh
#get the command line arguments
input_file=$1
divisor=$2
#run wc on the file in argv[1]
size=`wc -l $input_file`
#split the return on whitespace - first word is now in
set $size
#get the intiger division of wc/divisor
part=$(($1/$divisor))

## oggle.sh
#!/bin/sh
cmd=$*
pid=$$

echo "$cmd @ `pwd`; Started at `date`" > $pid.out
echo "[$pid] $cmd"
eval $cmd >> $pid.out 2>> $pid.err

echo "$cmd @ `pwd`; Finished at `date`" >> $pid.out

## fastq_sample.py
#Take a fraction of random sequence reads from a fastq file
from sh import wc
import argparse
import random

def first_word(string):
    return string.strip().split()[0]

#read a file in chunks of deflines
def read_segments(filename,deflines):

## blosum62.txt
#  blosum62
#  * column uses minimum score
#  BLOSUM Clustered Scoring Matrix in 1/2 Bit Units
#  Blocks Database = /data/blocks_5.0/blocks.dat
#  Cluster Percentage: >= 62
#  Entropy =   0.6979, Expected =  -0.5209
   A  R  N  D  C  Q  E  G  H  I  L  K  M  F  P  S  T  W  Y  V  B  Z  X  *
A  4 -1 -2 -2  0 -1 -1  0 -2 -1 -1 -1 -1 -2 -1  1  0 -3 -2  0 -2 -1  0 -4
R -1  5  0 -2 -3  1  0 -2  0 -3 -2  2 -1 -3 -2 -1 -1 -3 -2 -3 -1  0 -1 -4
N -2  0  6  1 -3  0  0  0  1 -3 -3  0 -2 -3 -2  1  0 -4 -2 -3  3  0 -1 -4

## gs.vim
^\<\u\l\{-}\> \<\l\{-}\>$

## mul_print.sh
tail -n+4 -q results/* | less -S

## descriptive_join.sh
join -a 1 -a2 -e 'NA' -o '0,1.2,2.2' ce hc > join.count
	! /usr/bin/osascript
	on run argv
	set Volume (item 1 of argv)
	end run
	import argparse
	from Bio import SeqIO
	parser=argparse.ArgumentParser()

	parser.add_argument("inputFile", help="input fasta file")
	parser.add_argument("outputFile",help="output file name")
	parser.add_argument("sampleName",help="sample name to be removed")

	args = parser.parse_args()
	#Read a fasta file and only keep the sequences with correct headers (id_pattern regex)

	import re
	import sys

	seq_pattern = re.compile(r">[^>]+\n",re.MULTILINE)
	id_pattern = re.compile(r"protein_id:(?P<id>[.\w]+)")

	with open(sys.argv[1]) as f:
	text = f.read()
	#!/bin/sh
	#get the command line arguments
	input_file=$1
	divisor=$2
	#run wc on the file in argv[1]
	size=`wc -l $input_file`
	#split the return on whitespace - first word is now in
	set $size
	#get the intiger division of wc/divisor
	part=$(($1/$divisor))
	#!/bin/sh
	cmd=$*
	pid=$$

	echo "$cmd @ `pwd`; Started at `date`" > $pid.out
	echo "[$pid] $cmd"
	eval $cmd >> $pid.out 2>> $pid.err

	echo "$cmd @ `pwd`; Finished at `date`" >> $pid.out
	#Take a fraction of random sequence reads from a fastq file
	from sh import wc
	import argparse
	import random

	def first_word(string):
	return string.strip().split()[0]

	#read a file in chunks of deflines
	def read_segments(filename,deflines):
	# blosum62
	# * column uses minimum score
	# BLOSUM Clustered Scoring Matrix in 1/2 Bit Units
	# Blocks Database = /data/blocks_5.0/blocks.dat
	# Cluster Percentage: >= 62
	# Entropy = 0.6979, Expected = -0.5209
	A R N D C Q E G H I L K M F P S T W Y V B Z X *
	A 4 -1 -2 -2 0 -1 -1 0 -2 -1 -1 -1 -1 -2 -1 1 0 -3 -2 0 -2 -1 0 -4
	R -1 5 0 -2 -3 1 0 -2 0 -3 -2 2 -1 -3 -2 -1 -1 -3 -2 -3 -1 0 -1 -4
	N -2 0 6 1 -3 0 0 0 1 -3 -3 0 -2 -3 -2 1 0 -4 -2 -3 3 0 -1 -4