Alon Shaiber ShaiberAlon

## summarize_blast_results_with_query_contig.py
#!/usr/bin/env python

# Click 'Download > Multiple-file JSON' from NCBI search results page,
# unzip it, run this script in it without any parameters, get the
# markdown formatted table.

import json
import glob

# poor man's whatever:

## MED.snake
import pandas as pd
import os

project_name = "GATA4"
SAMPLES = pd.read_csv('samples.txt', sep='\t', header=0, index_col=False)
SAMPLES = SAMPLES['samples'].values
os.makedirs("00_LOGS", exist_ok=True)
print(SAMPLES)
localrules: all,QC_report, merge, get_sample_info, prepare_things_for_GAST

## marker_gene_workflow.sh
#!/bin/bash

# CALL THIS SCRIPT WITH A PROJECT NAME PARAMETER

set -e

if [ "$#" -ne 1 ]; then
    echo "You need to give a project name as an argument to this script (and it better be a single, short and descriptive word without funny characters)."
    exit -1
fi

## create_my_gene_calls_file.shx
# Short bash to create an artificial gene call for a sequence for a collection of genes
# the artificial gene call would start at the first nucleotide of the first gene (denoted n_i) and end at the
# last nucleotide of the second gene (denoted n_f). The assumption is that n_i < n_f.
# Another assumption: both genes are from the same contig.
# The output is a file in the external gene calls format of anvi'o

# Example
# $ bash create_my_gene_calls_file.shx MY_GENE_CALL_FILE.txt CONTIGS.db 45 105
# this will give a gene call starting from the first nucleotide of gene 45 and ending at the last nucleotide of gene 105

## # vim - 2017-04-13_11-53-13.txt
Homebrew build logs for vim on macOS 10.11.6
Build date: 2017-04-13 11:53:13

## combine_fasta_files.py
import anvio.utils as utils
fasta_suffix = '.fna'
output_file = 'merged_fasta.fna'
samples_list = 'samples.txt'
output_fasta_dictionary = {}
with open(samples_list,'r') as s:
        for sample in s:
                sample=sample.strip()
                sample_fasta = "".join([sample,fasta_suffix])
                print(sample)

## MAP.shx
#!/bin/bash

### DEFAULTS (FEEL FREE TO EDIT THESE) ##################
NUM_THREADS_FOR_MAPPING=10
NUM_THREADS_FOR_HMMSCAN=4
NUM_THREADS_FOR_ANVI_GEN_CONTIG=4
NUM_THREADS_FOR_ANVI_PROFILE=4
NUM_THREADS_FOR_ANVI_MERGE=4

# configure whether SNV analysis will be included or not (if you want it included then leave this empty

## export_nuc_from_fasta.py
#!/usr/bin/env python

import anvio.utils as u
import argparse
import sys
parser =  argparse.ArgumentParser(description='Get nucleotides from fasta file beyween user defined nucleotide positions inside a specified contig')
parser.add_argument('-1','--N1',metavar='INT',dest='n1',type=int,help='Nucleotide sequence start position')
parser.add_argument('-2','--N2',metavar='INT',dest='n2',type=int,help='Nucleotide sequence start position')
parser.add_argument('-c','--contig',metavar='STRING',dest='c',help='Contig name')
parser.add_argument('-o','--out',metavar='FILE',dest='output',help='Output file')

## gen-tree-with-real-gene-order
#!/usr/bin/env python
# -*- coding: utf-8


__author__ = "Alon Shaiber"
__copyright__ = ""
__credits__ = []
__license__ = ""
__version__ = 1
__maintainer__ = "Alon Shaiber"

## add_2MA_to_AA_table.py
import csv
import numpy as np
import argparse

parser = argparse.ArgumentParser(description='Adding the 2MA column to anvio AA table')
parser.add_argument('-i','--input',metavar='FILE',dest='input_file',help='Input file')
parser.add_argument('-o','--out',metavar='FILE',dest='output_file',help='Name of file for output')
parser.add_argument('-r','--ratio',metavar='NUMBER',dest='ratio',type=float,help='Minimal ratio between consensus and the second most covered amino-acid. If the ratio is lower than the provided threshold, then the 2MA value would be in the form concensus_concensus')
args = parser.parse_args()
	#!/usr/bin/env python

	# Click 'Download > Multiple-file JSON' from NCBI search results page,
	# unzip it, run this script in it without any parameters, get the
	# markdown formatted table.

	import json
	import glob

	# poor man's whatever:
	import pandas as pd
	import os

	project_name = "GATA4"
	SAMPLES = pd.read_csv('samples.txt', sep='\t', header=0, index_col=False)
	SAMPLES = SAMPLES['samples'].values
	os.makedirs("00_LOGS", exist_ok=True)
	print(SAMPLES)
	localrules: all,QC_report, merge, get_sample_info, prepare_things_for_GAST
	#!/bin/bash

	# CALL THIS SCRIPT WITH A PROJECT NAME PARAMETER

	set -e

	if [ "$#" -ne 1 ]; then
	echo "You need to give a project name as an argument to this script (and it better be a single, short and descriptive word without funny characters)."
	exit -1
	fi
	# Short bash to create an artificial gene call for a sequence for a collection of genes
	# the artificial gene call would start at the first nucleotide of the first gene (denoted n_i) and end at the
	# last nucleotide of the second gene (denoted n_f). The assumption is that n_i < n_f.
	# Another assumption: both genes are from the same contig.
	# The output is a file in the external gene calls format of anvi'o

	# Example
	# $ bash create_my_gene_calls_file.shx MY_GENE_CALL_FILE.txt CONTIGS.db 45 105
	# this will give a gene call starting from the first nucleotide of gene 45 and ending at the last nucleotide of gene 105
	Homebrew build logs for vim on macOS 10.11.6
	Build date: 2017-04-13 11:53:13
	import anvio.utils as utils
	fasta_suffix = '.fna'
	output_file = 'merged_fasta.fna'
	samples_list = 'samples.txt'
	output_fasta_dictionary = {}
	with open(samples_list,'r') as s:
	for sample in s:
	sample=sample.strip()
	sample_fasta = "".join([sample,fasta_suffix])
	print(sample)
	#!/bin/bash

	### DEFAULTS (FEEL FREE TO EDIT THESE) ##################
	NUM_THREADS_FOR_MAPPING=10
	NUM_THREADS_FOR_HMMSCAN=4
	NUM_THREADS_FOR_ANVI_GEN_CONTIG=4
	NUM_THREADS_FOR_ANVI_PROFILE=4
	NUM_THREADS_FOR_ANVI_MERGE=4

	# configure whether SNV analysis will be included or not (if you want it included then leave this empty
	#!/usr/bin/env python

	import anvio.utils as u
	import argparse
	import sys
	parser = argparse.ArgumentParser(description='Get nucleotides from fasta file beyween user defined nucleotide positions inside a specified contig')
	parser.add_argument('-1','--N1',metavar='INT',dest='n1',type=int,help='Nucleotide sequence start position')
	parser.add_argument('-2','--N2',metavar='INT',dest='n2',type=int,help='Nucleotide sequence start position')
	parser.add_argument('-c','--contig',metavar='STRING',dest='c',help='Contig name')
	parser.add_argument('-o','--out',metavar='FILE',dest='output',help='Output file')
	#!/usr/bin/env python
	# -*- coding: utf-8


	__author__ = "Alon Shaiber"
	__copyright__ = ""
	__credits__ = []
	__license__ = ""
	__version__ = 1
	__maintainer__ = "Alon Shaiber"
	import csv
	import numpy as np
	import argparse

	parser = argparse.ArgumentParser(description='Adding the 2MA column to anvio AA table')
	parser.add_argument('-i','--input',metavar='FILE',dest='input_file',help='Input file')
	parser.add_argument('-o','--out',metavar='FILE',dest='output_file',help='Name of file for output')
	parser.add_argument('-r','--ratio',metavar='NUMBER',dest='ratio',type=float,help='Minimal ratio between consensus and the second most covered amino-acid. If the ratio is lower than the provided threshold, then the 2MA value would be in the form concensus_concensus')
	args = parser.parse_args()