Skip to content

Instantly share code, notes, and snippets.

View mdshw5's full-sized avatar

Matt Shirley mdshw5

View GitHub Profile
@mdshw5
mdshw5 / answer.py
Last active August 29, 2015 13:58
biostars 97452
from json import dumps
fh = open("restriction_enzymes.txt", "r")
enzyme_sites = dict()
for line in fh:
seq, name = line.rstrip().split()
enzyme_sites[seq] = name
# here is a nice way to print our dictionary
@mdshw5
mdshw5 / fasta_to_fastq.py
Created May 6, 2014 18:53
Biostars #99889
"""
Convert FASTA to FASTQ file with a static
Usage:
$ ./fasta_to_fastq NAME.fasta NAME.fastq
"""
import sys, os
from Bio import SeqIO
@mdshw5
mdshw5 / readcount.py
Created June 5, 2014 23:38
python bam read counts
from subprocess import Popen PIPE
def bam_read_count(bamfile):
""" Return a tuple of the number of mapped and unmapped reads in a bam file """
p = Popen(['samtools', 'idxstats', bamfile], stdout=PIPE)
mapped = 0
unmapped = 0
for line in p.stdout:
rname, rlen, nm, nu = line.rstrip().split()
mapped += int(nm)
@mdshw5
mdshw5 / rename_multifasta.py
Created July 3, 2014 13:53
biostars 105338
from pyfaidx import Fasta, wrap_sequence
key_fn = lambda x: ' '.join(x.replace('len=', '').split()[:2])
fa = Fasta('multi.fasta', key_function = key_fn)
with open('out.fasta', 'w') as out:
for seq in Fasta:
out.write('>{name}\n'.format(seq.name))
for line in wrap_sequence(70, str(seq)):
out.write(line)
@mdshw5
mdshw5 / split_fasta.py
Created July 4, 2014 11:36
biostars 105388
from pyfaidx import Fasta, wrap_sequence
fa = Fasta('multi.fasta')
for seq in fa:
with open('{}.fa'.format(seq.name), 'w') as out:
out.write('>{}\n'.format(seq.name))
for line in wrap_sequence(70, str(seq)):
out.write(line)
@mdshw5
mdshw5 / register_codon_table.py
Last active August 29, 2015 14:03
biostars 105805
from Bio.Data.CodonTable import register_ncbi_table
register_ncbi_table(name = 'Pterobranchia Mitochondrial',
alt_name = None, id = 24,
table = {
'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S',
'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y',
'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'L',
'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P',
'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q',
@mdshw5
mdshw5 / disorder.py
Last active August 29, 2015 14:03
biostars 105871
with open('input.sequencefile') as fh:
for line in fh:
name, seqid, seq, disid, dis = line.split()
print(' '.join([name, seqid]))
print(seq)
print(disid)
print('Pos R')
for i, (s, x) in enumerate(zip(seq, dis)):
if x == 'X':
print(' '.join([i + 1, s]))
@mdshw5
mdshw5 / uniprot_urlopen.py
Created July 23, 2014 13:33
biostars 107358
from urllib.request import urlopen
with open('url_file.txt') as urls:
for url in urls:
with urlopen(url) as page:
for line in page:
# do something
@mdshw5
mdshw5 / trim_fasta.py
Created August 8, 2014 16:32
biostars 108872
from pyfaidx import Fasta
fa = Fasta('multi.fasta', strict_bounds=True)
for seq in fa:
with open('multi.500bp.fasta, 'w') as out:
out.write(seq[:500])
@mdshw5
mdshw5 / freec2bedgraph.py
Last active August 29, 2015 14:05
convert FREEC ratio.txt file to bedGraph format
"""
Chromosome Start Ratio MedianRatio CopyNumber
1 1 -1 -1 5
1 9854 4.28786 2.74942 5
1 19707 3.38082 2.74942 5
1 29560 2.56094 2.74942 5
1 39413 2.87198 2.74942 5
1 49266 2.47 2.74942 5
1 59119 2.62686 2.74942 5
1 68972 2.0109 1.81246 4