Skip to content

Instantly share code, notes, and snippets.

@peterjc
peterjc / shed_diff
Last active August 29, 2015 14:10
Galaxy Tool Shed diff command
#!/usr/bin/env python
"""Galaxy Tool Shed diff command."""
import sys
import os
import subprocess
import tempfile
from optparse import OptionParser
VERSION = "v0.0.1"
@peterjc
peterjc / pulldown_shed_yml.py
Created April 29, 2015 10:28
Rough script for populating Galaxy .shed.yml files with metadata already in Tool Shed
# Walks specified folders looking for .shed.yml files,
# with at least owner and name given.
#
# Matches the owner/name with the remote Tool Shed, and
# takes any missing meta-data from the remote Tool Shed.
#
# Pre-existing local data otherwise should be preserved.
#
# Does the yaml dump with some hackery because I couldn't
# work out how to make the library use the layout I wanted.
from Bio import SeqIO
with open("CP008802.txt", "w") as output:
output.write("Seqname\tSource\tfeature\tStart\tEnd\tScore\tStrand\tFrame\tAttributes\n")
for record in SeqIO.parse("CP008802.gbk", "genbank"):
print("Converting %s" % record.name)
for f in record.features:
if f.type != "gene":
continue
locus_tag = f.qualifiers["locus_tag"][0]
if len(f.location.parts) > 1:
@peterjc
peterjc / brad_gff_import.py
Created August 24, 2011 02:31
Hack script to import selected folders from one git repository to another
#Copyright 2011 Peter Cock, released under GPL v3 licence
#
#Hack script to extract files in selected directories in one git
#repository and apply them to another repository using a potentially
#different directory structure.
#
#This written in Python and needs the git library, I used 0.3.2 RC1
#https://github.com/gitpython-developers/GitPython
#
#I assume when run both repositories are clean (no untracked files,
@peterjc
peterjc / select_fastq.py
Created September 7, 2011 16:27
Selecting FASTQ sequences by ID (Biopython with SeqRecord object)
from Bio import SeqIO
import sys
ids = set(x[:-1] for x in open(sys.argv[1]))
wanted = (rec for rec in SeqIO.parse(sys.stdin, "fastq") if rec.id in ids)
SeqIO.write(wanted, sys.stdout, "fastq")
@peterjc
peterjc / select_fastq2.py
Created September 7, 2011 16:29
Selecting FASTQ sequences by ID (Biopython with strings)
from Bio.SeqIO.QualityIO import FastqGeneralIterator
import sys
ids = set(x[:-1] for x in open(sys.argv[1]))
for title, seq, quals in FastqGeneralIterator(sys.stdin):
if title.split(None,1)[0] in ids:
print "@%s\n%s\n+\n%s\n" % (title, seq, quals)
@peterjc
peterjc / green_bottles.py
Created March 20, 2012 14:30
Simple "Green Bottles" Python script to benchmark ZLIB calls under different Python implementations + PyPy target
import zlib
import time
def decompress(comp_data):
d = zlib.decompressobj(-15) #Negative window size means no headers
uncomp_data = d.decompress(comp_data) + d.flush()
del d
return uncomp_data, zlib.crc32(uncomp_data)
def compress(orig_data):
@peterjc
peterjc / SAMv1_padded.fasta
Created May 14, 2012 10:00
SAM/BAM examples from the specification
>ref
AGCATGTTAGATAA**GATAGCTGTGCTAGTAGGCAGTCAGCGCCAT
@peterjc
peterjc / make_shed_yml.py
Created October 13, 2015 16:09
Hack for syncing local .shed.yml files with a Galaxy Tool Shed
#
# A hack, loosly based on Eric Rasche's disgusting.py
# https://gist.github.com/erasche/4ac3448b036f09979e14
#
# Intended as a one-off use script to help with syncing local
# .shed.yml files with a Galaxy Tool Shed. See also:
# https://gist.github.com/peterjc/5ebbf446d799f3aaa639
import yaml
import os
@peterjc
peterjc / ace_to_contig_stats.py
Created February 19, 2013 16:56
Quick Python script to extract contig summary information (lengths and number of reads, as a tabular file) from an ACE assembly file, using the Biopython ACE parser for convenience.
#!/usr/bin/env python
#Example usage:
#
# $ python ace_to_contig_stats.py < example.ace > example_stats.tsv
#
import sys
from Bio.Sequencing import Ace
sys.stdout.write("#Contig\tPadded length\tUnpadded length\tReads\n")
for contig in Ace.parse(sys.stdin):