Skip to content

Instantly share code, notes, and snippets.

View mdshw5's full-sized avatar

Matt Shirley mdshw5

View GitHub Profile
@mdshw5
mdshw5 / py27_demo.py
Last active August 29, 2015 13:55
portable replacement for random.choice()
Python 2.7.5 (default, Oct 9 2013, 20:09:30)
[GCC 4.2.1 Compatible Apple LLVM 5.0 (clang-500.2.78)] on darwin
>>> x = range(100)
>>> random.seed(1)
>>> print([random.choice(x) for i in range(10)])
[13, 84, 76, 25, 49, 44, 65, 78, 9, 2]
>>> random.seed(1)
>>> print([choice(x) for i in range(10)])
[13, 84, 76, 25, 49, 44, 65, 78, 9, 2]
@mdshw5
mdshw5 / py27.py
Last active August 29, 2015 13:55
random.choice in Python 2.X vs 3.X
def choice(self, seq):
"""Choose a random element from a non-empty sequence."""
return seq[int(self.random() * len(seq))] # raises IndexError
@mdshw5
mdshw5 / choice.py
Created February 2, 2014 20:20
portable choice
import random
def choice(seq):
"""Choose a random element from a non-empty sequence.
This function produces consistent results from Python2.7 to 3.3
in contrast to random.choice(). """
return seq[int(random.random() * len(seq))]
@mdshw5
mdshw5 / sam_rules
Created February 20, 2014 16:37
snakemake include issue
rule sam_to_bam:
input: "{dataset}.sam"
output: "{dataset}.bam"
shell: "module load sharedapps samtools; samtools view -@ 8 -bSo {output} {input}"
rule bam_index:
input: "{dataset}.bam"
output: "{dataset}.bam.bai"
shell: "module load sharedapps samtools; samtools index {input} {output}"
import urllib2
from bs4 import BeautifulSoup
from icalendar import Calendar, Event
import pytz
from datetime import datetime, timedelta
def scrape_scical():
data = urllib2.urlopen('http://www.hopkinsmedicine.org/scical/').read()
soup = BeautifulSoup(data)
cal = Calendar()
cal.add('prodid', '-//Hopkins Science Calendar//mattshirley.com/scical//')
@mdshw5
mdshw5 / freetype2.pc
Created March 27, 2014 16:43
pkg-config files for custom matplotlib installation
prefix=/home/matt/.local
exec_prefix=${prefix}
includedir=${prefix}/include
libdir=${exec_prefix}/lib
Name: freetype2
Description: The freetype2 library
Version: 2.5.3
Cflags: -I${includedir}/freetype2
Libs: -L${libdir} -lfreetype
@mdshw5
mdshw5 / extract_repeats.py
Created March 31, 2014 14:23
Biostars 96573 solution
from pyfaidx import Fasta
with open("regions.txt") as regions, Fasta("sequence.fasta") as fasta:
for line in regions:
fields = line.rstrip().split()
rname, start, end = fields[4:7]
repeat = ' '.join(fields[9:11])
seq = fasta[rname][int(start)-1:int(end)-1]
print(seq.name + repeat)
print(seq.seq)
@mdshw5
mdshw5 / answer.py
Last active August 29, 2015 13:58
biostars 97452
from json import dumps
fh = open("restriction_enzymes.txt", "r")
enzyme_sites = dict()
for line in fh:
seq, name = line.rstrip().split()
enzyme_sites[seq] = name
# here is a nice way to print our dictionary
@mdshw5
mdshw5 / readcount.py
Created June 5, 2014 23:38
python bam read counts
from subprocess import Popen PIPE
def bam_read_count(bamfile):
""" Return a tuple of the number of mapped and unmapped reads in a bam file """
p = Popen(['samtools', 'idxstats', bamfile], stdout=PIPE)
mapped = 0
unmapped = 0
for line in p.stdout:
rname, rlen, nm, nu = line.rstrip().split()
mapped += int(nm)
@mdshw5
mdshw5 / rename_multifasta.py
Created July 3, 2014 13:53
biostars 105338
from pyfaidx import Fasta, wrap_sequence
key_fn = lambda x: ' '.join(x.replace('len=', '').split()[:2])
fa = Fasta('multi.fasta', key_function = key_fn)
with open('out.fasta', 'w') as out:
for seq in Fasta:
out.write('>{name}\n'.format(seq.name))
for line in wrap_sequence(70, str(seq)):
out.write(line)