Matt Shirley mdshw5

## py27_demo.py
Python 2.7.5 (default, Oct  9 2013, 20:09:30)
[GCC 4.2.1 Compatible Apple LLVM 5.0 (clang-500.2.78)] on darwin
>>> x = range(100)
>>> random.seed(1)
>>> print([random.choice(x) for i in range(10)])
[13, 84, 76, 25, 49, 44, 65, 78, 9, 2]

>>> random.seed(1)
>>> print([choice(x) for i in range(10)])
[13, 84, 76, 25, 49, 44, 65, 78, 9, 2]

## py27.py
def choice(self, seq):
    """Choose a random element from a non-empty sequence."""
    return seq[int(self.random() * len(seq))]  # raises IndexError

## choice.py
import random

def choice(seq):
     """Choose a random element from a non-empty sequence.
      This function produces consistent results from Python2.7 to 3.3
      in contrast to random.choice(). """
     return seq[int(random.random() * len(seq))]

## sam_rules
rule sam_to_bam:
    input: "{dataset}.sam"
    output: "{dataset}.bam"
    shell: "module load sharedapps samtools; samtools view -@ 8 -bSo {output} {input}"

rule bam_index:
    input: "{dataset}.bam"
    output: "{dataset}.bam.bai"
    shell: "module load sharedapps samtools; samtools index {input} {output}"

## scical_scraper.py
    import urllib2
    from bs4 import BeautifulSoup
    from icalendar import Calendar, Event
    import pytz
    from datetime import datetime, timedelta
    def scrape_scical():
        data = urllib2.urlopen('http://www.hopkinsmedicine.org/scical/').read()
        soup = BeautifulSoup(data)
        cal = Calendar()
        cal.add('prodid', '-//Hopkins Science Calendar//mattshirley.com/scical//')

## freetype2.pc
prefix=/home/matt/.local
exec_prefix=${prefix}
includedir=${prefix}/include
libdir=${exec_prefix}/lib

Name: freetype2
Description: The freetype2 library
Version: 2.5.3
Cflags: -I${includedir}/freetype2
Libs: -L${libdir} -lfreetype

## extract_repeats.py
from pyfaidx import Fasta

with open("regions.txt") as regions, Fasta("sequence.fasta") as fasta:
  for line in regions:
    fields = line.rstrip().split()
    rname, start, end = fields[4:7]
    repeat = ' '.join(fields[9:11])
    seq = fasta[rname][int(start)-1:int(end)-1]
    print(seq.name + repeat)
    print(seq.seq)

## answer.py
from json import dumps
fh = open("restriction_enzymes.txt", "r")

enzyme_sites = dict()

for line in fh:
  seq, name = line.rstrip().split()
  enzyme_sites[seq] = name

# here is a nice way to print our dictionary

## readcount.py
from subprocess import Popen PIPE

def bam_read_count(bamfile):
    """ Return a tuple of the number of mapped and unmapped reads in a bam file """
    p = Popen(['samtools', 'idxstats', bamfile], stdout=PIPE)
    mapped = 0
    unmapped = 0
    for line in p.stdout:
        rname, rlen, nm, nu = line.rstrip().split()
        mapped += int(nm)

## rename_multifasta.py
from pyfaidx import Fasta, wrap_sequence

key_fn = lambda x: ' '.join(x.replace('len=', '').split()[:2])
fa = Fasta('multi.fasta', key_function = key_fn)

with open('out.fasta', 'w') as out:
  for seq in Fasta:
    out.write('>{name}\n'.format(seq.name))
    for line in wrap_sequence(70, str(seq)):
      out.write(line)
	Python 2.7.5 (default, Oct 9 2013, 20:09:30)
	[GCC 4.2.1 Compatible Apple LLVM 5.0 (clang-500.2.78)] on darwin
	>>> x = range(100)
	>>> random.seed(1)
	>>> print([random.choice(x) for i in range(10)])
	[13, 84, 76, 25, 49, 44, 65, 78, 9, 2]

	>>> random.seed(1)
	>>> print([choice(x) for i in range(10)])
	[13, 84, 76, 25, 49, 44, 65, 78, 9, 2]
	def choice(self, seq):
	"""Choose a random element from a non-empty sequence."""
	return seq[int(self.random() * len(seq))] # raises IndexError
	import random

	def choice(seq):
	"""Choose a random element from a non-empty sequence.
	This function produces consistent results from Python2.7 to 3.3
	in contrast to random.choice(). """
	return seq[int(random.random() * len(seq))]
	rule sam_to_bam:
	input: "{dataset}.sam"
	output: "{dataset}.bam"
	shell: "module load sharedapps samtools; samtools view -@ 8 -bSo {output} {input}"

	rule bam_index:
	input: "{dataset}.bam"
	output: "{dataset}.bam.bai"
	shell: "module load sharedapps samtools; samtools index {input} {output}"
	import urllib2
	from bs4 import BeautifulSoup
	from icalendar import Calendar, Event
	import pytz
	from datetime import datetime, timedelta
	def scrape_scical():
	data = urllib2.urlopen('http://www.hopkinsmedicine.org/scical/').read()
	soup = BeautifulSoup(data)
	cal = Calendar()
	cal.add('prodid', '-//Hopkins Science Calendar//mattshirley.com/scical//')
	prefix=/home/matt/.local
	exec_prefix=${prefix}
	includedir=${prefix}/include
	libdir=${exec_prefix}/lib

	Name: freetype2
	Description: The freetype2 library
	Version: 2.5.3
	Cflags: -I${includedir}/freetype2
	Libs: -L${libdir} -lfreetype
	from pyfaidx import Fasta

	with open("regions.txt") as regions, Fasta("sequence.fasta") as fasta:
	for line in regions:
	fields = line.rstrip().split()
	rname, start, end = fields[4:7]
	repeat = ' '.join(fields[9:11])
	seq = fasta[rname][int(start)-1:int(end)-1]
	print(seq.name + repeat)
	print(seq.seq)
	from json import dumps
	fh = open("restriction_enzymes.txt", "r")

	enzyme_sites = dict()

	for line in fh:
	seq, name = line.rstrip().split()
	enzyme_sites[seq] = name

	# here is a nice way to print our dictionary
	from subprocess import Popen PIPE

	def bam_read_count(bamfile):
	""" Return a tuple of the number of mapped and unmapped reads in a bam file """
	p = Popen(['samtools', 'idxstats', bamfile], stdout=PIPE)
	mapped = 0
	unmapped = 0
	for line in p.stdout:
	rname, rlen, nm, nu = line.rstrip().split()
	mapped += int(nm)
	from pyfaidx import Fasta, wrap_sequence

	key_fn = lambda x: ' '.join(x.replace('len=', '').split()[:2])
	fa = Fasta('multi.fasta', key_function = key_fn)

	with open('out.fasta', 'w') as out:
	for seq in Fasta:
	out.write('>{name}\n'.format(seq.name))
	for line in wrap_sequence(70, str(seq)):
	out.write(line)