This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# Slightly adapted from http://blog.nextgenetics.net/?e=27 | |
import sys | |
def main(): | |
for line in sys.stdin.xreadlines(): | |
#skip comment lines that start with the '#' character | |
if line[0] != '#': | |
#split line into columns by tab |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Download_SRP_Runs() { | |
SRP_IDs=`esearch -db sra -query $1 | efetch -format docsum | xtract -pattern DocumentSummary -element Run@acc | tr '\t' '\n'` | |
for r in ${SRP_IDs}; do | |
url="ftp://ftp-trace.ncbi.nih.gov/sra/sra-instant/reads/ByRun/sra/SRR/${r:0:6}/${r}/${r}.sra" | |
wget $url | |
done; | |
} | |
Download_SRP_Runs <SRP ID GOES HERE> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os, gzip | |
GFF_URL = "ftp://ftp.wormbase.org/pub/wormbase/releases/WS245/species/c_elegans/PRJNA13758/c_elegans.PRJNA13758.WS245.annotations.gff3.gz" | |
BUILD = re.search("WS[0-9]+",GFF_URL).group(0) | |
if not os.path.isfile("c_elegans.{BUILD}.annotations.gff3.gz".format(BUILD=BUILD)): | |
print "Downloading Annotation File" | |
os.system("curl 'ftp://ftp.wormbase.org/pub/wormbase/releases/WS245/species/c_elegans/PRJNA13758/c_elegans.PRJNA13758.WS245.annotations.gff3.gz' > c_elegans.{BUILD}.annotations.gff3.gz".format(BUILD=BUILD)) | |
acceptable_types = ['SNP', 'point_mutation'] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function rename_to_filename { | |
# Renames samples with the filename. | |
tmp=`mktemp -t temp` | |
echo ${1/.[vb]cf/} > $tmp | |
bcftools reheader -s $tmp $1 > m.$1 | |
mv m.$1 $1 | |
bcftools index $1 | |
} | |
function add_sample_prefix { |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# If you are trying to view VCF 4.2 files in IGV - you may run into issues. This function might help you. | |
# This script will: | |
# 1. Rename the file as version 4.1 | |
# 2. Replace parentheses in the INFO lines (IGV doesn't like these!) | |
function vcf_downgrade() { | |
outfile=${1/.bcf/} | |
outfile=${outfile/.gz/} | |
outfile=${outfile/.vcf/} | |
bcftools view --max-alleles 2 -O v $1 | \ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Download wormbase gff file | |
curl 'ftp://ftp.wormbase.org/pub/wormbase/releases/WS245/species/c_elegans/PRJNA13758/c_elegans.PRJNA13758.WS245.annotations.gff3.gz' > c_elegans.WS245.annotations.gff3.gz | |
# Use gff parallelized tools: brew install dmd | |
# Extract each type into its own GFF File | |
# This list obtained by running: | |
# gunzip -kfc c_elegans.WS245.annotations.gff3.gz | cut -f 3 | sort | uniq | |
types="CDS | |
DNAseI_hypersensitive_site |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def SLURM_get_last_job(): | |
jobid = Popen("squeue --user=$USER -o %i -h -S -i | head -n 1", stdout=PIPE, shell=True).communicate()[0] | |
jobid = jobid.strip() | |
if jobid == "": | |
return "" | |
else: | |
return jobid |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def chunk_genome(chunk_size, reference): | |
""" | |
Parses bwa .ann file to retrieve chromosome sizes | |
for chunking purposes | |
""" | |
ann = open(reference + ".ann").read() | |
# Parsing .ann files | |
contigs = [x.split(" ")[1] for x in ann.split("\n")[1:-1:1]][::2] | |
contig_sizes = map(int,[x.split(" ")[1] for x in ann.split("\n")[1:-1:1]][1::2]) | |
for chrom, size in zip(contigs, contig_sizes): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Run this script in a directory containing zip files from fastqc. It aggregates images of each type in individual folders | |
# So looking across data is quick. | |
zips=`ls *.zip` | |
for i in $zips; do | |
unzip -o $i &>/dev/null; | |
done | |
fastq_folders=${zips/.zip/} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
current_feature = "" | |
for line in sys.stdin: | |
feature = line.split("\t")[2] | |
if feature != current_feature: | |
f = file(feature + ".gff", "a+") | |
f.write(line) |