Skip to content

Instantly share code, notes, and snippets.

@DougC
Created May 31, 2011 17:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save DougC/1000894 to your computer and use it in GitHub Desktop.
Save DougC/1000894 to your computer and use it in GitHub Desktop.
Read a lisp-like syntax
package cmn
import java.io.StringBufferInputStream
trait Token {
val value : String
}
case object OPEN_PAREN extends Token {
val value = "("
}
case object CLOSE_PAREN extends Token {
val value = ")"
}
case object WS extends Token {
val value = " "
}
case class Word(val value: String) extends Token
object CMNReader {
val data = """(cmn (size 24)
(system brace
(staff treble (meter 6 8)
(c4 e. tenuto) (d4 s) (ef4 e sf)
(c4 e) (d4 s) (en4 s) (fs4 e (fingering 3)))
(staff treble (meter 3 4)
(c5 e. marcato) (d5 s bartok-pizzicato) (ef5 e)
(c5 e staccato tenuto) (d5 s down-bow) (en5 s) (fs5 e)))
(system bracket
(staff bar bass (meter 6 16)
(c4 e. wedge) (d4 s staccato) (ef4 e left-hand-pizzicato)
(c4 e tenuto accent rfz) (d4 s mordent) (en4 s pp) (fs4 e fermata))))"""
def main(args: Array[String]) = {
println(tokenize(new scala.io.BufferedSource(new StringBufferInputStream(data))) filterNot(_ == WS))
}
def tokenize(in: scala.io.Source): List[Token] = {
if (in.hasNext) {
in.next match {
case '(' => OPEN_PAREN :: tokenize(in)
case ')' => CLOSE_PAREN :: tokenize(in)
case c if (c.isWhitespace) => tokenize(in) match {
case first :: rest if first == WS => WS :: rest
case tl => WS :: tl
}
case c => tokenize(in) match {
case first :: rest => first match {
case w: Word => Word(c.toString + w.value) :: rest
case _ => Word(c.toString) :: first :: rest
}
case _ => Word(c.toString) :: List()
}
}
} else {
List()
}
}
}
@DougC
Copy link
Author

DougC commented May 31, 2011

Output:

List(OPEN_PAREN, Word(cmn), OPEN_PAREN, Word(size), Word(24), CLOSE_PAREN, OPEN_PAREN, Word(system), Word(brace), OPEN_PAREN, Word(staff), Word(treble), OPEN_PAREN, Word(meter), Word(6), Word(8), CLOSE_PAREN, OPEN_PAREN, Word(c4), Word(e.), Word(tenuto), CLOSE_PAREN, OPEN_PAREN, Word(d4), Word(s), CLOSE_PAREN, OPEN_PAREN, Word(ef4), Word(e), Word(sf), CLOSE_PAREN, OPEN_PAREN, Word(c4), Word(e), CLOSE_PAREN, OPEN_PAREN, Word(d4), Word(s), CLOSE_PAREN, OPEN_PAREN, Word(en4), Word(s), CLOSE_PAREN, OPEN_PAREN, Word(fs4), Word(e), OPEN_PAREN, Word(fingering), Word(3), CLOSE_PAREN, CLOSE_PAREN, CLOSE_PAREN, OPEN_PAREN, Word(staff), Word(treble), OPEN_PAREN, Word(meter), Word(3), Word(4), CLOSE_PAREN, OPEN_PAREN, Word(c5), Word(e.), Word(marcato), CLOSE_PAREN, OPEN_PAREN, Word(d5), Word(s), Word(bartok-pizzicato), CLOSE_PAREN, OPEN_PAREN, Word(ef5), Word(e), CLOSE_PAREN, OPEN_PAREN, Word(c5), Word(e), Word(staccato), Word(tenuto), CLOSE_PAREN, OPEN_PAREN, Word(d5), Word(s), Word(down-bow), CLOSE_PAREN, OPEN_PAREN, Word(en5), Word(s), CLOSE_PAREN, OPEN_PAREN, Word(fs5), Word(e), CLOSE_PAREN, CLOSE_PAREN, CLOSE_PAREN, OPEN_PAREN, Word(system), Word(bracket), OPEN_PAREN, Word(staff), Word(bar), Word(bass), OPEN_PAREN, Word(meter), Word(6), Word(16), CLOSE_PAREN, OPEN_PAREN, Word(c4), Word(e.), Word(wedge), CLOSE_PAREN, OPEN_PAREN, Word(d4), Word(s), Word(staccato), CLOSE_PAREN, OPEN_PAREN, Word(ef4), Word(e), Word(left-hand-pizzicato), CLOSE_PAREN, OPEN_PAREN, Word(c4), Word(e), Word(tenuto), Word(accent), Word(rfz), CLOSE_PAREN, OPEN_PAREN, Word(d4), Word(s), Word(mordent), CLOSE_PAREN, OPEN_PAREN, Word(en4), Word(s), Word(pp), CLOSE_PAREN, OPEN_PAREN, Word(fs4), Word(e), Word(fermata), CLOSE_PAREN, CLOSE_PAREN, CLOSE_PAREN, CLOSE_PAREN)

@DougC
Copy link
Author

DougC commented Jun 1, 2011

Of course, being a recursive approach this will crash when the number of tokens on the input exceeds the maximum stack depth. I think it should be possible to re-work it so that the maximum stack depth would be equal to the number of chars in the longest word, and in other cases would be tail-recursive.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment