Alon Shaiber ShaiberAlon

## wait_for_cluster.py
import os
import sys
import time
import xml.dom.minidom
import string
import getpass

def still_running(job_name, job_owner):
    f=os.popen('qstat -u \* -xml -r')

## remove_duplicate_sequence.shx
#!bin/bash
# remove_duplicate_sequence.shx is a short bash to remove multiple copies of sequences from an input fasta file and saves the result in an output fasta file.
# the bash script was based on Pierre Lindenbaum's script: https://www.biostars.org/p/3003/#3008
# input:
# -f | --file  : fasta file
# -o | --output : output file after removing all the sequences
#
while [ "$1" != "" ]; do
        case $1 in
                -f | --file ) shift

## convert_table_to_fasta.shx
#!bin/bash
#
# expects tab-delimited table with only two columns and no header
# column 1 - name of each sequence
# column 2 - sequence
# performs two actions:
# Adds > at the begining of each row
# converts all the tabs to new lines
#
while [ "$1" != "" ]; do

## ceonvert_xmlTab_to_normalTab.shx
tr  '\r' '\n' < file.txt > file_fix.txt

## compare_lists.py
#!/usr/bin/env python

list1=open('p207_profiled_samples.txt','r')
list2=open('p214_profield_samples.txt','r')

print list(set(list1.read()) - set(list2.read()))

## Call_metaphlan.shx
#!/bin/bash

set -e

DIR=Metaphlan_output
job=Metaphlan_Primates
email=alon.shaiber@gmail.com
WAIT () {
    python /workspace/meren/wait_for_cluster.py $1
}

## add_2MA_to_AA_table.py
import csv
import numpy as np
import argparse

parser = argparse.ArgumentParser(description='Adding the 2MA column to anvio AA table')
parser.add_argument('-i','--input',metavar='FILE',dest='input_file',help='Input file')
parser.add_argument('-o','--out',metavar='FILE',dest='output_file',help='Name of file for output')
parser.add_argument('-r','--ratio',metavar='NUMBER',dest='ratio',type=float,help='Minimal ratio between consensus and the second most covered amino-acid. If the ratio is lower than the provided threshold, then the 2MA value would be in the form concensus_concensus')
args = parser.parse_args()

## gen-tree-with-real-gene-order
#!/usr/bin/env python
# -*- coding: utf-8


__author__ = "Alon Shaiber"
__copyright__ = ""
__credits__ = []
__license__ = ""
__version__ = 1
__maintainer__ = "Alon Shaiber"

## export_nuc_from_fasta.py
#!/usr/bin/env python

import anvio.utils as u
import argparse
import sys
parser =  argparse.ArgumentParser(description='Get nucleotides from fasta file beyween user defined nucleotide positions inside a specified contig')
parser.add_argument('-1','--N1',metavar='INT',dest='n1',type=int,help='Nucleotide sequence start position')
parser.add_argument('-2','--N2',metavar='INT',dest='n2',type=int,help='Nucleotide sequence start position')
parser.add_argument('-c','--contig',metavar='STRING',dest='c',help='Contig name')
parser.add_argument('-o','--out',metavar='FILE',dest='output',help='Output file')

## MAP.shx
#!/bin/bash

### DEFAULTS (FEEL FREE TO EDIT THESE) ##################
NUM_THREADS_FOR_MAPPING=10
NUM_THREADS_FOR_HMMSCAN=4
NUM_THREADS_FOR_ANVI_GEN_CONTIG=4
NUM_THREADS_FOR_ANVI_PROFILE=4
NUM_THREADS_FOR_ANVI_MERGE=4

# configure whether SNV analysis will be included or not (if you want it included then leave this empty
	import os
	import sys
	import time
	import xml.dom.minidom
	import string
	import getpass

	def still_running(job_name, job_owner):
	f=os.popen('qstat -u \* -xml -r')
	#!bin/bash
	# remove_duplicate_sequence.shx is a short bash to remove multiple copies of sequences from an input fasta file and saves the result in an output fasta file.
	# the bash script was based on Pierre Lindenbaum's script: https://www.biostars.org/p/3003/#3008
	# input:
	# -f \| --file : fasta file
	# -o \| --output : output file after removing all the sequences
	#
	while [ "$1" != "" ]; do
	case $1 in
	-f \| --file ) shift
	#!bin/bash
	#
	# expects tab-delimited table with only two columns and no header
	# column 1 - name of each sequence
	# column 2 - sequence
	# performs two actions:
	# Adds > at the begining of each row
	# converts all the tabs to new lines
	#
	while [ "$1" != "" ]; do
	#!/usr/bin/env python

	list1=open('p207_profiled_samples.txt','r')
	list2=open('p214_profield_samples.txt','r')

	print list(set(list1.read()) - set(list2.read()))
	#!/bin/bash

	set -e

	DIR=Metaphlan_output
	job=Metaphlan_Primates
	email=alon.shaiber@gmail.com
	WAIT () {
	python /workspace/meren/wait_for_cluster.py $1
	}
	import csv
	import numpy as np
	import argparse

	parser = argparse.ArgumentParser(description='Adding the 2MA column to anvio AA table')
	parser.add_argument('-i','--input',metavar='FILE',dest='input_file',help='Input file')
	parser.add_argument('-o','--out',metavar='FILE',dest='output_file',help='Name of file for output')
	parser.add_argument('-r','--ratio',metavar='NUMBER',dest='ratio',type=float,help='Minimal ratio between consensus and the second most covered amino-acid. If the ratio is lower than the provided threshold, then the 2MA value would be in the form concensus_concensus')
	args = parser.parse_args()
	#!/usr/bin/env python
	# -*- coding: utf-8


	__author__ = "Alon Shaiber"
	__copyright__ = ""
	__credits__ = []
	__license__ = ""
	__version__ = 1
	__maintainer__ = "Alon Shaiber"
	#!/usr/bin/env python

	import anvio.utils as u
	import argparse
	import sys
	parser = argparse.ArgumentParser(description='Get nucleotides from fasta file beyween user defined nucleotide positions inside a specified contig')
	parser.add_argument('-1','--N1',metavar='INT',dest='n1',type=int,help='Nucleotide sequence start position')
	parser.add_argument('-2','--N2',metavar='INT',dest='n2',type=int,help='Nucleotide sequence start position')
	parser.add_argument('-c','--contig',metavar='STRING',dest='c',help='Contig name')
	parser.add_argument('-o','--out',metavar='FILE',dest='output',help='Output file')
	#!/bin/bash

	### DEFAULTS (FEEL FREE TO EDIT THESE) ##################
	NUM_THREADS_FOR_MAPPING=10
	NUM_THREADS_FOR_HMMSCAN=4
	NUM_THREADS_FOR_ANVI_GEN_CONTIG=4
	NUM_THREADS_FOR_ANVI_PROFILE=4
	NUM_THREADS_FOR_ANVI_MERGE=4

	# configure whether SNV analysis will be included or not (if you want it included then leave this empty