Skip to content

Instantly share code, notes, and snippets.

View mscook's full-sized avatar

Mitchell Stanton-Cook mscook

View GitHub Profile
@mscook
mscook / get_PHRED.py
Created May 12, 2014 06:16
Gives an idea on what PHRED encoding
import glob
import os
inf = glob.glob("*.gz")
rlen = 76.0
for f in inf:
tmp = f.split(".fastq.gz")[0]
os.system("zcat "+f+" | head -n 4 | tail -n 1 > "+tmp)
with open(tmp) as fin:
@mscook
mscook / validate_SeqFindR.py
Created April 3, 2014 23:56
Takes a SeqFindR database and does some simple validation
# Takes a SeqFindR database and does some simple validation
#
# usage:
#
# python validate_SeqFindR.py $input.fa
import sys
stored = []
import matplotlib
matplotlib.use('Agg')
import sys
import collections
import pylab as plt
from matplotlib import rcParams
rcParams.update({'figure.subplot.bottom': 0.25})
from Bio.Blast import NCBIXML
input = 'CFT073.B2_comp.fa_blast.xml'
for record in NCBIXML.parse(open('CFT073.B2_comp.fa_blast.xml')):
for align in record.alignments:
for idx, hsp in enumerate(align.hsps):
if idx == 0:
print ">"+input.split("_")[0]+"_"+record.query.split(',')[1].strip()
print hsp.sbjct
@mscook
mscook / extract_Prokka.py
Created March 17, 2014 07:01
Given a set order, extract out statistics from a Prokka run
import glob
order = open("order.dat").readlines()
#organism: Escherichia coli HVM1147
#contigs: 93
#bases: 5131204
#rRNA: 4
@mscook
mscook / fix_order.py
Created March 17, 2014 05:02
Need to tidy up Mauve ordering run (should not prefix "ordered" and should be in uppercase)
import glob
import os
inf = glob.glob("*/*.fas")
for f in inf:
#IR65/IR65_69_Contigs-against-EC958-complete.fas
new = f.replace("-complete", "")
os.system("fab bio_util.fasta_to_uppercase:"+f+","+new)
#new = f.replace("ordered-", '')
@mscook
mscook / kranken_run_parse.py
Created March 13, 2014 06:23
Parse kraken analysis of assemblies flagging those for further analysis
import glob
import sys
import os
from Bio import Entrez
Entrez.email = "Beatson.Lab@gmail.com"
@mscook
mscook / nesoni_stats.py
Created January 22, 2014 05:46
Want to extract out mapping stats from a nesoni run?
# Mitchell Stanton-Cook
# m.stantoncook@gmail.com
# github.com/mscook
import glob
"""
(> XXXXX 47,065 unmapped pairs
(> XXXXX 19,785 unmapped reads
(> XXXXX 5,098,912 reads/pairs with alignments
@mscook
mscook / chunk_mfa.py
Created November 5, 2013 06:14
Chunk a file into consecutive segments
import sys
import textwrap
"""
Usage:
chunk_mfa.py input.fa 500
"""
@mscook
mscook / example_fabric_class_task.py
Created June 12, 2013 14:02
How to use classes to define fabric tasks
from fabric.api import task
from fabric.tasks import Task
class MyTask(Task):
name = "deploy"
def run(self, environment, domain="whatever.com"):
run("git clone foo")
sudo("service apache2 restart")
instance = MyTask()