Skip to content

Instantly share code, notes, and snippets.

View mscook's full-sized avatar

Mitchell Stanton-Cook mscook

View GitHub Profile
diff --git a/bin/bg7 b/bin/bg7
old mode 100644
new mode 100755
index f5173ba..1b84cab
--- a/bin/bg7
+++ b/bin/bg7
@@ -29,7 +29,8 @@ runs bg7 version 0.9. Params are:
-G | --genbank_data: xml with extra data for your project needed for .gbk output (things like
locus name, Gen bank division tag, etc). There's a template for this at
${BG7_HOME}/resources/
@mscook
mscook / example_fabric_class_task.py
Created June 12, 2013 14:02
How to use classes to define fabric tasks
from fabric.api import task
from fabric.tasks import Task
class MyTask(Task):
name = "deploy"
def run(self, environment, domain="whatever.com"):
run("git clone foo")
sudo("service apache2 restart")
instance = MyTask()
@mscook
mscook / chunk_mfa.py
Created November 5, 2013 06:14
Chunk a file into consecutive segments
import sys
import textwrap
"""
Usage:
chunk_mfa.py input.fa 500
"""
@mscook
mscook / nesoni_stats.py
Created January 22, 2014 05:46
Want to extract out mapping stats from a nesoni run?
# Mitchell Stanton-Cook
# m.stantoncook@gmail.com
# github.com/mscook
import glob
"""
(> XXXXX 47,065 unmapped pairs
(> XXXXX 19,785 unmapped reads
(> XXXXX 5,098,912 reads/pairs with alignments
@mscook
mscook / kranken_run_parse.py
Created March 13, 2014 06:23
Parse kraken analysis of assemblies flagging those for further analysis
import glob
import sys
import os
from Bio import Entrez
Entrez.email = "Beatson.Lab@gmail.com"
@mscook
mscook / fix_order.py
Created March 17, 2014 05:02
Need to tidy up Mauve ordering run (should not prefix "ordered" and should be in uppercase)
import glob
import os
inf = glob.glob("*/*.fas")
for f in inf:
#IR65/IR65_69_Contigs-against-EC958-complete.fas
new = f.replace("-complete", "")
os.system("fab bio_util.fasta_to_uppercase:"+f+","+new)
#new = f.replace("ordered-", '')
@mscook
mscook / extract_Prokka.py
Created March 17, 2014 07:01
Given a set order, extract out statistics from a Prokka run
import glob
order = open("order.dat").readlines()
#organism: Escherichia coli HVM1147
#contigs: 93
#bases: 5131204
#rRNA: 4
from Bio.Blast import NCBIXML
input = 'CFT073.B2_comp.fa_blast.xml'
for record in NCBIXML.parse(open('CFT073.B2_comp.fa_blast.xml')):
for align in record.alignments:
for idx, hsp in enumerate(align.hsps):
if idx == 0:
print ">"+input.split("_")[0]+"_"+record.query.split(',')[1].strip()
print hsp.sbjct
import matplotlib
matplotlib.use('Agg')
import sys
import collections
import pylab as plt
from matplotlib import rcParams
rcParams.update({'figure.subplot.bottom': 0.25})
@mscook
mscook / validate_SeqFindR.py
Created April 3, 2014 23:56
Takes a SeqFindR database and does some simple validation
# Takes a SeqFindR database and does some simple validation
#
# usage:
#
# python validate_SeqFindR.py $input.fa
import sys
stored = []