Skip to content

Instantly share code, notes, and snippets.

@gnp
Last active August 29, 2015 14:14
Show Gist options
  • Save gnp/251edb42ace55c4103c6 to your computer and use it in GitHub Desktop.
Save gnp/251edb42ace55c4103c6 to your computer and use it in GitHub Desktop.
> Fake Line
AGCTACGACTAGCCGCGCGCTATATACTAGCATCGACATTTTTATATTAAGACGAGACTATCATATACTAGCGAGCGCGGCACTATATTTGCTCGACTACACAGCCATCAAGATCAACACATATATACTTCCCCTATACACCAACACAGCGGGGACGAATACTATCATCATCATCATCAGCGCGCGCGCAGCAGAGGAAGGAAGGAATTCCTCTACTCTATTTATAGACGCGASAGCAG
> New Line
AGTAGAT
> Cat
> Doghead
AGTCG
GAT
GGG
C
GAGTCAG
> Noodles
G
import scala.io.Source
object FastaTestMain {
val header = """^>\s*(.*?)\s*$""".r // Allows destructuring pattern match in parse()
case class FastaItem(header: String, sequence: String)
def readFile(filename: String): Iterator[FastaItem] = {
val conditioned = Source.fromFile("fasta.txt").getLines.map(_.trim).filter(_ != "") // Trim and skip blank lines
val start: (Option[String], Option[String]) = (None, None) // Tuple: At start, no header (left) and no sequence (right)
val parsed = conditioned.scanLeft(start) { case x@(state, line) => x match {
case (_, header(h)) => (Some(h), None) // If we see a header, remember it in the state going forward
case ((ho, _), s) => (ho, Some(s)) // If we see a sequence, associate it to the remembered header from the state
}
}
for {
(ho, so) <- parsed // Iterate over the pairs of optional header and optional sequence
h <- ho // Only keep the pair if it has a header
s <- so // Only keep the pair if it has a sequence
} yield FastaItem(h, s)
}
def main(args: Array[String]): Unit = {
for (i <- readFile("fasta.txt")) {
println(i)
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment