Matt Shirley mdshw5

## answer.py
from json import dumps
fh = open("restriction_enzymes.txt", "r")

enzyme_sites = dict()

for line in fh:
  seq, name = line.rstrip().split()
  enzyme_sites[seq] = name

# here is a nice way to print our dictionary

## fasta_to_fastq.py
"""
Convert FASTA to FASTQ file with a static

Usage:
$ ./fasta_to_fastq NAME.fasta NAME.fastq
"""

import sys, os
from Bio import SeqIO

## readcount.py
from subprocess import Popen PIPE

def bam_read_count(bamfile):
    """ Return a tuple of the number of mapped and unmapped reads in a bam file """
    p = Popen(['samtools', 'idxstats', bamfile], stdout=PIPE)
    mapped = 0
    unmapped = 0
    for line in p.stdout:
        rname, rlen, nm, nu = line.rstrip().split()
        mapped += int(nm)

## rename_multifasta.py
from pyfaidx import Fasta, wrap_sequence

key_fn = lambda x: ' '.join(x.replace('len=', '').split()[:2])
fa = Fasta('multi.fasta', key_function = key_fn)

with open('out.fasta', 'w') as out:
  for seq in Fasta:
    out.write('>{name}\n'.format(seq.name))
    for line in wrap_sequence(70, str(seq)):
      out.write(line)

## split_fasta.py
from pyfaidx import Fasta, wrap_sequence
fa = Fasta('multi.fasta')
for seq in fa:
  with open('{}.fa'.format(seq.name), 'w') as out:
    out.write('>{}\n'.format(seq.name))
    for line in wrap_sequence(70, str(seq)):
      out.write(line)

## register_codon_table.py
from Bio.Data.CodonTable import register_ncbi_table

register_ncbi_table(name = 'Pterobranchia Mitochondrial',
                    alt_name = None, id = 24,
                    table = {
     'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S',
     'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y',
     'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'L',
     'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P',
     'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q',

## disorder.py
with open('input.sequencefile') as fh:
  for line in fh:
    name, seqid, seq, disid, dis = line.split()
    print(' '.join([name, seqid]))
    print(seq)
    print(disid)
    print('Pos R')
    for i, (s, x) in enumerate(zip(seq, dis)):
      if x == 'X':
        print(' '.join([i + 1, s]))

## uniprot_urlopen.py
from urllib.request import urlopen

with open('url_file.txt') as urls:
  for url in urls:
  with urlopen(url) as page:
    for line in page:
      # do something

## trim_fasta.py
from pyfaidx import Fasta
fa = Fasta('multi.fasta', strict_bounds=True)
for seq in fa:
  with open('multi.500bp.fasta, 'w') as out:
    out.write(seq[:500])

## freec2bedgraph.py
"""
Chromosome      Start   Ratio   MedianRatio     CopyNumber
1       1       -1      -1      5
1       9854    4.28786 2.74942 5
1       19707   3.38082 2.74942 5
1       29560   2.56094 2.74942 5
1       39413   2.87198 2.74942 5
1       49266   2.47    2.74942 5
1       59119   2.62686 2.74942 5
1       68972   2.0109  1.81246 4
	from json import dumps
	fh = open("restriction_enzymes.txt", "r")

	enzyme_sites = dict()

	for line in fh:
	seq, name = line.rstrip().split()
	enzyme_sites[seq] = name

	# here is a nice way to print our dictionary
	"""
	Convert FASTA to FASTQ file with a static

	Usage:
	$ ./fasta_to_fastq NAME.fasta NAME.fastq
	"""

	import sys, os
	from Bio import SeqIO
	from subprocess import Popen PIPE

	def bam_read_count(bamfile):
	""" Return a tuple of the number of mapped and unmapped reads in a bam file """
	p = Popen(['samtools', 'idxstats', bamfile], stdout=PIPE)
	mapped = 0
	unmapped = 0
	for line in p.stdout:
	rname, rlen, nm, nu = line.rstrip().split()
	mapped += int(nm)
	from pyfaidx import Fasta, wrap_sequence

	key_fn = lambda x: ' '.join(x.replace('len=', '').split()[:2])
	fa = Fasta('multi.fasta', key_function = key_fn)

	with open('out.fasta', 'w') as out:
	for seq in Fasta:
	out.write('>{name}\n'.format(seq.name))
	for line in wrap_sequence(70, str(seq)):
	out.write(line)
	from pyfaidx import Fasta, wrap_sequence
	fa = Fasta('multi.fasta')
	for seq in fa:
	with open('{}.fa'.format(seq.name), 'w') as out:
	out.write('>{}\n'.format(seq.name))
	for line in wrap_sequence(70, str(seq)):
	out.write(line)
	from Bio.Data.CodonTable import register_ncbi_table

	register_ncbi_table(name = 'Pterobranchia Mitochondrial',
	alt_name = None, id = 24,
	table = {
	'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S',
	'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y',
	'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'L',
	'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P',
	'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q',
	with open('input.sequencefile') as fh:
	for line in fh:
	name, seqid, seq, disid, dis = line.split()
	print(' '.join([name, seqid]))
	print(seq)
	print(disid)
	print('Pos R')
	for i, (s, x) in enumerate(zip(seq, dis)):
	if x == 'X':
	print(' '.join([i + 1, s]))
	from urllib.request import urlopen

	with open('url_file.txt') as urls:
	for url in urls:
	with urlopen(url) as page:
	for line in page:
	# do something
	from pyfaidx import Fasta
	fa = Fasta('multi.fasta', strict_bounds=True)
	for seq in fa:
	with open('multi.500bp.fasta, 'w') as out:
	out.write(seq[:500])
	"""
	Chromosome Start Ratio MedianRatio CopyNumber
	1 1 -1 -1 5
	1 9854 4.28786 2.74942 5
	1 19707 3.38082 2.74942 5
	1 29560 2.56094 2.74942 5
	1 39413 2.87198 2.74942 5
	1 49266 2.47 2.74942 5
	1 59119 2.62686 2.74942 5
	1 68972 2.0109 1.81246 4