Created
May 31, 2011 17:09
-
-
Save DougC/1000894 to your computer and use it in GitHub Desktop.
Read a lisp-like syntax
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package cmn | |
import java.io.StringBufferInputStream | |
trait Token { | |
val value : String | |
} | |
case object OPEN_PAREN extends Token { | |
val value = "(" | |
} | |
case object CLOSE_PAREN extends Token { | |
val value = ")" | |
} | |
case object WS extends Token { | |
val value = " " | |
} | |
case class Word(val value: String) extends Token | |
object CMNReader { | |
val data = """(cmn (size 24) | |
(system brace | |
(staff treble (meter 6 8) | |
(c4 e. tenuto) (d4 s) (ef4 e sf) | |
(c4 e) (d4 s) (en4 s) (fs4 e (fingering 3))) | |
(staff treble (meter 3 4) | |
(c5 e. marcato) (d5 s bartok-pizzicato) (ef5 e) | |
(c5 e staccato tenuto) (d5 s down-bow) (en5 s) (fs5 e))) | |
(system bracket | |
(staff bar bass (meter 6 16) | |
(c4 e. wedge) (d4 s staccato) (ef4 e left-hand-pizzicato) | |
(c4 e tenuto accent rfz) (d4 s mordent) (en4 s pp) (fs4 e fermata))))""" | |
def main(args: Array[String]) = { | |
println(tokenize(new scala.io.BufferedSource(new StringBufferInputStream(data))) filterNot(_ == WS)) | |
} | |
def tokenize(in: scala.io.Source): List[Token] = { | |
if (in.hasNext) { | |
in.next match { | |
case '(' => OPEN_PAREN :: tokenize(in) | |
case ')' => CLOSE_PAREN :: tokenize(in) | |
case c if (c.isWhitespace) => tokenize(in) match { | |
case first :: rest if first == WS => WS :: rest | |
case tl => WS :: tl | |
} | |
case c => tokenize(in) match { | |
case first :: rest => first match { | |
case w: Word => Word(c.toString + w.value) :: rest | |
case _ => Word(c.toString) :: first :: rest | |
} | |
case _ => Word(c.toString) :: List() | |
} | |
} | |
} else { | |
List() | |
} | |
} | |
} |
Of course, being a recursive approach this will crash when the number of tokens on the input exceeds the maximum stack depth. I think it should be possible to re-work it so that the maximum stack depth would be equal to the number of chars in the longest word, and in other cases would be tail-recursive.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Output:
List(OPEN_PAREN, Word(cmn), OPEN_PAREN, Word(size), Word(24), CLOSE_PAREN, OPEN_PAREN, Word(system), Word(brace), OPEN_PAREN, Word(staff), Word(treble), OPEN_PAREN, Word(meter), Word(6), Word(8), CLOSE_PAREN, OPEN_PAREN, Word(c4), Word(e.), Word(tenuto), CLOSE_PAREN, OPEN_PAREN, Word(d4), Word(s), CLOSE_PAREN, OPEN_PAREN, Word(ef4), Word(e), Word(sf), CLOSE_PAREN, OPEN_PAREN, Word(c4), Word(e), CLOSE_PAREN, OPEN_PAREN, Word(d4), Word(s), CLOSE_PAREN, OPEN_PAREN, Word(en4), Word(s), CLOSE_PAREN, OPEN_PAREN, Word(fs4), Word(e), OPEN_PAREN, Word(fingering), Word(3), CLOSE_PAREN, CLOSE_PAREN, CLOSE_PAREN, OPEN_PAREN, Word(staff), Word(treble), OPEN_PAREN, Word(meter), Word(3), Word(4), CLOSE_PAREN, OPEN_PAREN, Word(c5), Word(e.), Word(marcato), CLOSE_PAREN, OPEN_PAREN, Word(d5), Word(s), Word(bartok-pizzicato), CLOSE_PAREN, OPEN_PAREN, Word(ef5), Word(e), CLOSE_PAREN, OPEN_PAREN, Word(c5), Word(e), Word(staccato), Word(tenuto), CLOSE_PAREN, OPEN_PAREN, Word(d5), Word(s), Word(down-bow), CLOSE_PAREN, OPEN_PAREN, Word(en5), Word(s), CLOSE_PAREN, OPEN_PAREN, Word(fs5), Word(e), CLOSE_PAREN, CLOSE_PAREN, CLOSE_PAREN, OPEN_PAREN, Word(system), Word(bracket), OPEN_PAREN, Word(staff), Word(bar), Word(bass), OPEN_PAREN, Word(meter), Word(6), Word(16), CLOSE_PAREN, OPEN_PAREN, Word(c4), Word(e.), Word(wedge), CLOSE_PAREN, OPEN_PAREN, Word(d4), Word(s), Word(staccato), CLOSE_PAREN, OPEN_PAREN, Word(ef4), Word(e), Word(left-hand-pizzicato), CLOSE_PAREN, OPEN_PAREN, Word(c4), Word(e), Word(tenuto), Word(accent), Word(rfz), CLOSE_PAREN, OPEN_PAREN, Word(d4), Word(s), Word(mordent), CLOSE_PAREN, OPEN_PAREN, Word(en4), Word(s), Word(pp), CLOSE_PAREN, OPEN_PAREN, Word(fs4), Word(e), Word(fermata), CLOSE_PAREN, CLOSE_PAREN, CLOSE_PAREN, CLOSE_PAREN)