Skip to content

Instantly share code, notes, and snippets.

View chapmanb's full-sized avatar

Brad Chapman chapmanb

View GitHub Profile
def _index_bfast(ref_file):
"""Indexes bfast in color and nucleotide space for longer reads.
This preps for 40+bp sized reads, which is bfast's strength.
"""
dir_name = "bfast"
window_size = 14
bfast_nt_masks = [
"1111111111111111111111",
"1111101110111010100101011011111",
(use '[clojure.java.io])
(use '[clojure.contrib.str-utils2 :only (join)])
(defn fasta-lengths [in-file]
"Generate collection of FASTA record lengths, splitting at '>' delimiters"
(->> (line-seq (reader in-file))
(partition-by #(.startsWith ^String % ">"))
(filter #(not (.startsWith ^String (first %) ">")))
(map #(join "" %))
(map #(.length ^String %))))
with open ("dna.txt", "r") as myfile:
data=myfile.readlines()
mRNA=data[1]
def next_transcript(mRNA, cur_pos):
initial=mRNA.find("AUG", cur_pos)
for i in range(initial, len(mRNA),3):
if mRNA[i:i+3] == "UAG":
return initial, i
(comment "
Retrieve UniProt FASTA files for provided input identifiers using REST API.
http://www.uniprot.org/faq/28
Usage:
cljr run fetch_uniprot_fasta.clj [Any number of Uniprot IDs]
")
(ns biostar.uniprot
(:import [java.net URLEncoder])
"""Retrieve sequences from a MAF file for a genomic coordinate.
Usage:
maf_retrieve_region.py <maf file> <organism> <chromosome> <start> <end>
"""
import os
import sys
import subprocess
from bx.align import maf
@chapmanb
chapmanb / gist:727625
Created December 3, 2010 21:57
retrieve_gene.py
from Bio import Entrez
def fetch_gene_coordinates(search_term):
handle = Entrez.esearch(db="gene", term=search_term)
rec = Entrez.read(handle)
gene_id = rec["IdList"][0] # assuming best match works
handle = Entrez.efetch(db="gene", id=gene_id, retmode="xml")
rec = Entrez.read(handle)[0]
gene_locus = rec["Entrezgene_locus"][0]
class ORFFinder:
"""Find the longest ORF in a given sequence
"seq" is a string, if "start" is not provided any codon can be the start of
and ORF. If muliple ORFs have the longest length the first one encountered
is printed
"""
def __init__(self, seq, start=[], stop=["TAG", "TAA", "TGA"]):
self.seq = seq
self.start = start
self.stop = stop
--- src/main/java/org/biojava3/core/sequence/transcription/RNAToAminoAcidTranslator.java.orig 2011-03-09 15:32:39.295875004 -0500
+++ src/main/java/org/biojava3/core/sequence/transcription/RNAToAminoAcidTranslator.java 2011-03-10 08:18:42.525875009 -0500
@@ -85,7 +85,6 @@
codonL.add(codon);
}
-
unknownAminoAcidCompound = aminoAcids.getCompoundForString("X");
}
@chapmanb
chapmanb / vagrant_pallet.clj
Created June 20, 2011 11:26
Running pallet with manual specification of a server; example with vagrant virtualbox
(ns distblast-cluster.vagrant
(:require [pallet.compute :as compute]
[pallet.phase :as phase]
[pallet.core :as core]
[pallet.utils :as utils]
[pallet.compute.node-list :as node-list]
[pallet.action.exec-script :as exec-script]))
(defn test-script [session]
(-> session
@chapmanb
chapmanb / galaxy-init-example
Created December 5, 2011 19:29
Example Galaxy start script for CentOS: edit and put in /etc/init.d/galaxy
#!/bin/bash
#--- config
SERVICE_NAME=galaxy
RUN_AS=galaxy
RUN_IN=/your/galaxy/directory
#--- main actions