Matt Shirley mdshw5

## answer.py
import simplesam
with simplesam.Reader(open('input.sam')) as sam:
    with simplesam.Writer(open('output.sam', 'w'), sam.header) as fixed:
        for read in sam:
            if bool(read.flag & 0x800):
                read.flag -= 0x800
                if not read.secondary:
                    read.flag += 0x100
            fixed.write(read)

## answer.py
import pyfaidx

with pyfaidx.Fasta("file.fa") as peg_fasta:
  with open("file.sorted.fa", 'w') as sorted_fasta:
    for id in sorted(peg_fasta.keys()):
      sorted_fasta.write('>' + peg_fasta[id].long_name)
      sorted_fasta.write(str(peg_fasta[id]))

## answer.py
import pyfaidx
import simplesam

with pyfaidx.Fasta('genome.fasta') as fasta, simplesam.Reader(open('orfH9_offtarget.sam')) as sam:
    for read in sam:
        if read.mapped and not read.secondary:
            # grab the reference sequnce from indexed FASTA
            zero_index = read.pos - 1
            sequence_chunk = fasta[read.rname][zero_index:zero_index+20]
            # or get the reference sequence from the SAM MD tag

## answer.py
from simplesam import Reader
from pyfaidx import Fasta

with Reader(open('library.bam', 'r')) as sam_file, Fasta('hg38.fa', as_raw=True) as hg38:
  for read in sam_file:
    if read.mapped:
      # might also want to handle read.reverse here
      prior_pos = read.pos - 2  # read.pos is 1-based
      prior_base = hg38[read.rname][prior_pos]

## answer.py
from pyfaidx import Fasta

genes = Fasta('Genome.fasta')

with open('chr02_18s', 'w') as f:
  seqFile = genes['chr02'][146062:148216]
  f.write('>' + seqFile.name)
  f.write(seqFile.seq)  # or str(seqFile)

## answer.py
from pyfaidx import Fasta

file_1 = {}
with open('file1.txt', 'r') as ids:
  for line in ids:
    key, value = line.strip().split('\t')
    file_1[key] = value

file_2 = Fasta('file2.fa', key_function=lambda key: file_1[key])

## answer.py
from pyfaidx import Fasta

name_map = {}
with open('newnames.txt') as newnames:
  next(newnames)  # remove header
  for line in newnames:
    old, new = line.rstrip().split()
    name_map[old] = new

with open('seqnew.fa', 'w') as new_fasta:

## answer.py
import random
import sys
from pyfaidx import Fasta

n = 10
faa = Fasta("file.faa")
for sample in random.sample(faa, n):
    print(sample.name)
    for line in sample:
        print(line)

## pileup.py
#!/usr/bin/env python

import sys
import argparse
import pkg_resources
from collections import deque
from collections import Counter
try:
    from collections import OrderedDict
except ImportError: #python 2.6

## example.py
from pyfaidx import FastaVariant

consensus = FastaVariant('reference.fasta', 'sample1.vcf.gz', het=True, hom=True)
chrom = 'chr1'

seq = consensus[chrom][0:8]
print(seq)  # AGTGCG

# if you don't want to invariant sites masked, you're good to go. otherwise:
	import simplesam
	with simplesam.Reader(open('input.sam')) as sam:
	with simplesam.Writer(open('output.sam', 'w'), sam.header) as fixed:
	for read in sam:
	if bool(read.flag & 0x800):
	read.flag -= 0x800
	if not read.secondary:
	read.flag += 0x100
	fixed.write(read)
	import pyfaidx

	with pyfaidx.Fasta("file.fa") as peg_fasta:
	with open("file.sorted.fa", 'w') as sorted_fasta:
	for id in sorted(peg_fasta.keys()):
	sorted_fasta.write('>' + peg_fasta[id].long_name)
	sorted_fasta.write(str(peg_fasta[id]))
	import pyfaidx
	import simplesam

	with pyfaidx.Fasta('genome.fasta') as fasta, simplesam.Reader(open('orfH9_offtarget.sam')) as sam:
	for read in sam:
	if read.mapped and not read.secondary:
	# grab the reference sequnce from indexed FASTA
	zero_index = read.pos - 1
	sequence_chunk = fasta[read.rname][zero_index:zero_index+20]
	# or get the reference sequence from the SAM MD tag
	from simplesam import Reader
	from pyfaidx import Fasta

	with Reader(open('library.bam', 'r')) as sam_file, Fasta('hg38.fa', as_raw=True) as hg38:
	for read in sam_file:
	if read.mapped:
	# might also want to handle read.reverse here
	prior_pos = read.pos - 2 # read.pos is 1-based
	prior_base = hg38[read.rname][prior_pos]
	from pyfaidx import Fasta

	genes = Fasta('Genome.fasta')

	with open('chr02_18s', 'w') as f:
	seqFile = genes['chr02'][146062:148216]
	f.write('>' + seqFile.name)
	f.write(seqFile.seq) # or str(seqFile)
	from pyfaidx import Fasta

	file_1 = {}
	with open('file1.txt', 'r') as ids:
	for line in ids:
	key, value = line.strip().split('\t')
	file_1[key] = value

	file_2 = Fasta('file2.fa', key_function=lambda key: file_1[key])
	from pyfaidx import Fasta

	name_map = {}
	with open('newnames.txt') as newnames:
	next(newnames) # remove header
	for line in newnames:
	old, new = line.rstrip().split()
	name_map[old] = new

	with open('seqnew.fa', 'w') as new_fasta:
	import random
	import sys
	from pyfaidx import Fasta

	n = 10
	faa = Fasta("file.faa")
	for sample in random.sample(faa, n):
	print(sample.name)
	for line in sample:
	print(line)
	#!/usr/bin/env python

	import sys
	import argparse
	import pkg_resources
	from collections import deque
	from collections import Counter
	try:
	from collections import OrderedDict
	except ImportError: #python 2.6
	from pyfaidx import FastaVariant

	consensus = FastaVariant('reference.fasta', 'sample1.vcf.gz', het=True, hom=True)
	chrom = 'chr1'

	seq = consensus[chrom][0:8]
	print(seq) # AGTGCG

	# if you don't want to invariant sites masked, you're good to go. otherwise: