Created
March 30, 2010 15:25
-
-
Save ksf/349196 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# OPTIONS_GHC -fvia-C -O2 -optc-O2 -optc-ffast-math -fexcess-precision -funbox-strict-fields #-} | |
module Data where | |
iubs, homs :: [(Char,Float)] | |
iubs = [('a',0.27),('c',0.12),('g',0.12),('t',0.27),('B',0.02) | |
,('D',0.02),('H',0.02),('K',0.02),('M',0.02),('N',0.02) | |
,('R',0.02),('S',0.02),('V',0.02),('W',0.02),('Y',0.02)] | |
homs = [('a',0.3029549426680),('c',0.1979883004921) | |
,('g',0.1975473066391),('t',0.3015094502008)] | |
alu = "GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG\ | |
\GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA\ | |
\CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT\ | |
\ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA\ | |
\GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG\ | |
\AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC\ | |
\AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# OPTIONS_GHC -fvia-C -O2 -optc-O2 -optc-ffast-math -fexcess-precision -funbox-strict-fields #-} | |
{-# LANGUAGE TemplateHaskell, MagicHash, BangPatterns #-} | |
module Fasta where | |
import qualified Data.ByteString as S | |
import Data.ByteString.Internal | |
import Language.Haskell.TH | |
import GHC.Word | |
import GHC.Base | |
import Foreign | |
import System.IO | |
import Control.Arrow | |
import Debug.Trace | |
import Unsafe.Coerce | |
bufSize = 4096 | |
printSeq dat = [| | |
let go seed n buf = {-# SCC "bufs" #-} bufs buf n seed | |
bufs buf 0 seed = return seed | |
bufs buf i seed | |
| seqT seed = do | |
seed' <- {-# SCC "lines" #-} lines buf m seed | |
hPutBuf stdout buf m' | |
bufs buf (i-m) seed' | |
where | |
m = min i ((bufSize `div` (lineL+1)) * lineL) | |
m' = (m+(m `quot` lineL)+if m `rem` lineL > 0 then 1 else 0) | |
lines buf 0 seed = return seed | |
lines buf i seed | |
| seqT seed = {-# SCC "bytes" #-} bytes eol buf seed >>= lines (incPtr eol) (i-m) | |
where | |
m = min i lineL | |
eol = buf `plusPtr` m | |
bytes eol rp seed | |
| rp == eol = pokeNl eol >> return seed | |
| seqT seed = poke rp w >> bytes eol (incPtr rp) seed' | |
where (w, seed') = {-# SCC "choose" #-} choose seed | |
lineL = 60 | |
seqT = flip seq True | |
incPtr = flip plusPtr 1 | |
pokeNl ptr = poke ptr (c2w '\n') | |
choose = first (\p -> ($(lookupNuc dat) p)) . rand | |
in \n seed -> allocaBytes bufSize (go seed n) |] | |
writeFasta label title n s = do | |
putStrLn $ ">" ++ label ++ " " ++ title | |
let (t:ts) = cycle . return . S.pack . map c2w $ s | |
go ts t n | |
where | |
go ss s n | |
| l60 && n60 = S.putStrLn l >> go ss r (n-60) | |
| n60 = S.putStr s >> S.putStrLn a >> go (tail ss) b (n-60) | |
| n <= ln = S.putStrLn (S.take n s) | |
| otherwise = S.putStr s >> S.putStrLn (S.take (n-ln) (head ss)) | |
where | |
ln = S.length s | |
l60 = ln >= 60 | |
n60 = n >= 60 | |
(l,r) = S.splitAt 60 s | |
(a,b) = S.splitAt (60-ln) (head ss) | |
rand :: Int -> (Float, Int) | |
rand seed = (rand, seed') where | |
im = 139968 | |
ia = 3877 | |
ic = 29573 | |
seed' = (seed * ia + ic) `rem` im | |
rand = fromIntegral seed' / fromIntegral im | |
data T b l | |
= B b (T b l) (T b l) | |
| L l | |
lookupNuc :: [(Char, Float)] -> ExpQ | |
lookupNuc = lam1E pP . go . snd . ann . build . acc 0 where | |
acc a ((b, f):xs) = L ((c2w b), f') : acc f' xs where f' = a + f | |
acc a [] = [] | |
build [x] = x | |
build l@(x:_) = build (f l) where | |
f (l:h:xs) = B () l h: f xs | |
f xs = xs | |
ann (B () (L (lw, lf)) (L (hw, hf))) = (hf, B lf (L lw) (L hw)) | |
ann (B () l (L (hw, hf))) = (hf, B lf l' (L hw)) | |
where (lf, l') = ann l | |
ann (B () l h ) = (hf, B lf l' h') | |
where (lf, l') = ann l | |
(hf, h') = ann h | |
pP = varP . mkName $ "p" | |
pE = varE . mkName $ "p" | |
ltE = varE . mkName $ "<" | |
flE = litE . rationalL . toRational | |
w8E = litE . integerL . fromIntegral | |
go (L w) = w8E w | |
go (B f l h) = condE (appE (appE ltE pE) (flE f)) (go l) (go h) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--{-# OPTIONS_GHC -O2 -fexcess-precision #-} | |
--{-# OPTIONS_GHC -fvia-C -O2 -optc-O2 -optc-ffast-math -fexcess-precision #-} | |
{-# OPTIONS_GHC -fvia-C -O2 -optc-O2 -optc-ffast-math -optc-march=native -fexcess-precision -funbox-strict-fields #-} | |
{-# LANGUAGE BangPatterns, TemplateHaskell, MagicHash #-} | |
-- | |
-- The Computer Language Benchmarks Game | |
-- http://shootout.alioth.debian.org/ | |
-- | |
-- The Noble League of Haskell Performance Knights | |
-- http://www.haskell.org/haskellwiki/Shootout | |
-- | |
-- Contributed by Don Stewart | |
-- A lazy bytestring solution. | |
-- Unnecessary strictness annotations removed by Sterling Clover 2/08 | |
-- Deburring and optimized lookup by Achim Schneider 11/09 | |
-- | |
-- Add: | |
-- -funfolding-use-threshold=40 | |
-- | |
import System | |
import Data.Word | |
import Control.Arrow | |
import qualified Data.ByteString.Lazy as L | |
import qualified Data.ByteString.Lazy.Char8 as C (pack,unfoldr) | |
import qualified Data.ByteString as S | |
import Data.ByteString.Internal (c2w) | |
import Data.ByteString.Unsafe | |
import Fasta | |
import Data | |
import GHC.Word | |
import GHC.Base | |
import System.IO | |
import Foreign | |
main = getArgs >>= readIO . head >>= fasta | |
fasta n = writeFasta "ONE" "Homo sapiens alu" (n*2) alu | |
>> randFasta "TWO" "IUB ambiguity codes" (n*3) $(printSeq iubs) 42 | |
>>= randFasta "THREE" "Homo sapiens frequency" (n*5) $(printSeq homs) | |
>> return () | |
------------------------------------------------------------------------- | |
-- | |
-- Lazily unfold the randomised dna sequences | |
-- | |
randFasta l t n f g = putStrLn (">" ++ l ++ " " ++ t) >> f n g | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment