Ivan Krukov ivan-krukov

## data.csv

          
            1
            1
            0

            
              1
              2
              0

            
              1
              3
              0

            
              2
              1
              0

            
              2
              2
              0

            
              2
              3
              0

            
              4
              1
              1

            
              4
              2
              1

            
              4
              3
              1

            
              5
              1
              1

## README.md

      
              4 files
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                ivan-krukov
                / README.md
            
            
              Last active
              August 29, 2015 14:17
            
              
                Chords
              
          
    #20 chords
A simple chord diagram for amino-acid relationships
##Usage
The input data should be a 20x20 table with tabs as separators.
To run the thing, do this (uses firefox)
git clone https://gist.github.com/ivan-kryukov/c265c9df6bfeda28abc8


## README.md

      
              2 files
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                ivan-krukov
                / README.md
            
            
              Last active
              August 29, 2015 14:18
            
              
                Shuffle and relabel observations in a csv file
              
          
    #Randomization script
The idea is to do bootstrap resampling on the control dataset to make sure it is appropriate as a control dataset.
#Usage
python randomize.py <input.csv> <resample_times>


## hello.fa
>sequence 1
ACTG

## volume
! /usr/bin/osascript
on run argv
	set Volume (item 1 of argv)
end run

## kyles_script.py
import argparse
from Bio import SeqIO
parser=argparse.ArgumentParser()

parser.add_argument("inputFile", help="input fasta file")
parser.add_argument("outputFile",help="output file name")
parser.add_argument("sampleName",help="sample name to be removed")

args = parser.parse_args()

## fastaparse.py
#Read a fasta file and only keep the sequences with correct headers (id_pattern regex)

import re
import sys

seq_pattern = re.compile(r">[^>]+\n",re.MULTILINE)
id_pattern = re.compile(r"protein_id:(?P<id>[.\w]+)")

with open(sys.argv[1]) as f:
    text = f.read()

## splitter.sh
#!/bin/sh
#get the command line arguments
input_file=$1
divisor=$2
#run wc on the file in argv[1]
size=`wc -l $input_file`
#split the return on whitespace - first word is now in
set $size
#get the intiger division of wc/divisor
part=$(($1/$divisor))

## oggle.sh
#!/bin/sh
cmd=$*
pid=$$

echo "$cmd @ `pwd`; Started at `date`" > $pid.out
echo "[$pid] $cmd"
eval $cmd >> $pid.out 2>> $pid.err

echo "$cmd @ `pwd`; Finished at `date`" >> $pid.out

## fastq_sample.py
#Take a fraction of random sequence reads from a fastq file
from sh import wc
import argparse
import random

def first_word(string):
    return string.strip().split()[0]

#read a file in chunks of deflines
def read_segments(filename,deflines):
	! /usr/bin/osascript
	on run argv
	set Volume (item 1 of argv)
	end run
	import argparse
	from Bio import SeqIO
	parser=argparse.ArgumentParser()

	parser.add_argument("inputFile", help="input fasta file")
	parser.add_argument("outputFile",help="output file name")
	parser.add_argument("sampleName",help="sample name to be removed")

	args = parser.parse_args()
	#Read a fasta file and only keep the sequences with correct headers (id_pattern regex)

	import re
	import sys

	seq_pattern = re.compile(r">[^>]+\n",re.MULTILINE)
	id_pattern = re.compile(r"protein_id:(?P<id>[.\w]+)")

	with open(sys.argv[1]) as f:
	text = f.read()
	#!/bin/sh
	#get the command line arguments
	input_file=$1
	divisor=$2
	#run wc on the file in argv[1]
	size=`wc -l $input_file`
	#split the return on whitespace - first word is now in
	set $size
	#get the intiger division of wc/divisor
	part=$(($1/$divisor))
	#!/bin/sh
	cmd=$*
	pid=$$

	echo "$cmd @ `pwd`; Started at `date`" > $pid.out
	echo "[$pid] $cmd"
	eval $cmd >> $pid.out 2>> $pid.err

	echo "$cmd @ `pwd`; Finished at `date`" >> $pid.out
	#Take a fraction of random sequence reads from a fastq file
	from sh import wc
	import argparse
	import random

	def first_word(string):
	return string.strip().split()[0]

	#read a file in chunks of deflines
	def read_segments(filename,deflines):