Created
June 25, 2010 21:46
-
-
Save ray1729/453499 to your computer and use it in GitHub Desktop.
Process Jan's SNP data with Clojure
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
;; This is a suggested improvement for processing SNP data, in response | |
;; to Jan Aerts' blog post <http://saaientist.blogspot.com/2010/06/encounter-with-incanter-about-clojure.html>. | |
;; Note that this example is for Clojure 1.1.0; if you're using Clojure 1.2, change the ns definition | |
;; to use clojure.contrib.io instead of duck-streams. | |
(ns ray1729.clojure.bio.snps | |
(:use [clojure.contrib.duck-streams :only (read-lines)])) | |
(defn parse-line | |
[s] | |
(let [m (apply hash-map (interleave [:chr :pos :quality :ref :alt] | |
(re-seq #"\S+" s)))] | |
(assoc m :quality (Integer. (get m :quality "0"))))) | |
(defn ti? | |
[r a] | |
(or (every? #{"A" "G"} [r a]) | |
(every? #{"C" "T"} [r a]))) | |
(defn tv? | |
[r a] | |
(or (and (#{"A" "G"} r) (#{"C" "T"} a)) | |
(and (#{"C" "T"} r) (#{"A" "G"} a)))) | |
(defn make-bucket-sorter [cutoffs] | |
(let [cutoffs (sort cutoffs)] | |
(fn [quality] | |
(loop [this_cutoff (first cutoffs) cutoffs (rest cutoffs)] | |
(if (or (empty? cutoffs) (<= quality this_cutoff)) | |
this_cutoff | |
(recur (first cutoffs) (rest cutoffs))))))) | |
(defn compute-ratios | |
[cutoffs tis tvs] | |
(for [c (sort cutoffs) | |
:let [ti (get tis c 0) | |
tv (get tvs c 0) | |
s (+ ti tv) | |
pct (if (zero? s) nil (int (* 100 (/ ti s))))]] | |
[c pct])) | |
(defn process-snps | |
[datafile cutoffs] | |
(let [bucket-for (make-bucket-sorter cutoffs)] | |
(loop [ti {} tv {} data (map parse-line (read-lines datafile))] | |
(if (empty? data) | |
(compute-ratios cutoffs ti tv) | |
(let [d (first data) | |
ds (rest data) | |
b (bucket-for (:quality d)) | |
r (:ref d) | |
a (:alt d)] | |
(cond | |
(ti? r a) (recur (assoc ti b (inc (get ti b 0))) tv ds) | |
(tv? r a) (recur ti (assoc tv b (inc (get tv b 0))) ds) | |
:else (recur ti tv ds))))))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment