Skip to content

Instantly share code, notes, and snippets.

@paradigmatic
Created August 23, 2012 14:48
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save paradigmatic/3437345 to your computer and use it in GitHub Desktop.
Save paradigmatic/3437345 to your computer and use it in GitHub Desktop.
FASTA parser in Scala
import scala.util.parsing.combinator._
object FASTA {
case class Entry( description: String, sequence: String )
def fromFile( fn: String ): List[Entry] = {
val lines = io.Source.fromFile(fn).getLines.mkString("\n")
fromString( lines )
}
def fromString( input: String ): List[Entry] =
Parser.parse(input)
private object Parser extends RegexParsers {
lazy val header = """>.*""".r ^^ { _.tail.trim }
lazy val seqLine = """[^>].*""".r ^^ { _.trim }
lazy val sequence = rep1( seqLine ) ^^ { _.mkString }
lazy val entry = header ~ sequence ^^ {
case h ~ s => Entry(h,s)
}
lazy val entries = rep1( entry )
def parse( input: String ): List[Entry] = {
parseAll( entries, input ) match {
case Success( es , _ ) => es
case x: NoSuccess => throw new Exception(x.toString)
}
}
}
}
object FASTADemo extends App {
val fn = args(0)
val entries = FASTA.fromFile( fn )
for( e <- entries ) {
println( e.description + " -> " + e.sequence )
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment