Skip to content

Instantly share code, notes, and snippets.

@gravelld
Last active January 20, 2016 11:15
Show Gist options
  • Save gravelld/6f068e53a44cdb4738a8 to your computer and use it in GitHub Desktop.
Save gravelld/6f068e53a44cdb4738a8 to your computer and use it in GitHub Desktop.
A parboiled2 parser for Discogs track positions
package com.elsten.adb.extract.discogs.release
import org.parboiled2._
/*
* Parses strings according to http://www.discogs.com/help/doc/submission-guidelines-release-trk#Position
*/
class DiscogsTrackPositionParser(val input: ParserInput, val isMultiDisc: Boolean = false, val isAutocoupled: Boolean = false) extends Parser {
def mediumPrefixKnownFormat = rule { "DVD" | "CD" }
def mediumPos = rule {
optional(capture(mediumPrefixKnownFormat)) ~ capture(oneOrMore(CharPredicate.Digit)) ~> ((format, pos) => (format, Some(pos)))
}
def mediumPosMandatoryFormat = rule { capture(mediumPrefixKnownFormat) ~ capture(oneOrMore(CharPredicate.Digit)) ~> ((format, pos) => (format, pos)) }
def subtrackDigit = rule { "." ~ capture(oneOrMore(CharPredicate.Digit)) }
def subtrackAlpha = rule { optional(".") ~ capture(oneOrMore(CharPredicate.Alpha)) }
def side = rule { CharPredicate.Alpha }
def trackWithSubTrackPos = rule {
capture(optional(side) ~ oneOrMore(CharPredicate.Digit)) ~ subtrackAlpha |
capture(optional(side) ~ oneOrMore(CharPredicate.Digit)) ~ subtrackDigit |
capture(oneOrMore(CharPredicate.Digit)) ~ subtrackAlpha
}
// a map of auto coupled sides to medium number
// Commented out further discs. See Songs In The Key Of Life which has 3 media, but trying to auto couple them will give four media, 'E' -> 3, 'H' -> 3, 'F' -> 4, 'G' -> 4
val autocoupledSeq = Map('A' -> 1, 'D' -> 1, 'B' -> 2, 'C' -> 2)
def convertToMediumNumber(mediumChar: Char) = {
def charCodeModulus = ((mediumChar.toUpper - 'A'.toInt) / 2) + 1
isAutocoupled match {
case true => autocoupledSeq.get(mediumChar.toUpper).getOrElse(charCodeModulus)
case false => charCodeModulus
}
}
def trackPosWithSide = rule {
capture(side) ~ capture(oneOrMore(CharPredicate.Alpha)) ~> ((side, pos) => (convertToMediumNumber(side.head).toString, side + pos)) |
capture(side) ~ capture(oneOrMore(CharPredicate.Digit)) ~> ((side, pos) => (convertToMediumNumber(side.head).toString, side + pos)) |
capture(oneOrMore(CharPredicate.Alpha)) ~> ((pos) => (convertToMediumNumber(pos.head).toString, pos))
}
def trackPos = rule {
capture(oneOrMore(CharPredicate.Digit)) |
capture(oneOrMore(CharPredicate.Alpha))
}
def dotSeparator = rule { "." }
def hyphenSeparator = rule { "-" }
def mediumAndTrack = isMultiDisc match {
case true =>
rule {
mediumPosMandatoryFormat ~ dotSeparator ~ trackWithSubTrackPos ~> ((mediumPos, trackPos, subTrack) => ParsedTrackPosition(Some(mediumPos._1), Some(mediumPos._2), Some(trackPos), Some(subTrack))) |
mediumPosMandatoryFormat ~ dotSeparator ~ trackPos ~> ((mediumPos, trackPos) => ParsedTrackPosition(Some(mediumPos._1), Some(mediumPos._2), Some(trackPos), None)) |
mediumPos ~ dotSeparator ~ trackPos ~> ((mediumPos, trackPos) => ParsedTrackPosition(mediumPos._1, mediumPos._2, Some(trackPos), None)) |
mediumPos ~ hyphenSeparator ~ trackWithSubTrackPos ~> ((mediumPos, trackPos, subTrack) => ParsedTrackPosition(mediumPos._1, mediumPos._2, Some(trackPos), Some(subTrack))) |
mediumPos ~ hyphenSeparator ~ trackPos ~> ((mediumPos, trackPos) => ParsedTrackPosition(mediumPos._1, mediumPos._2, Some(trackPos), None)) |
trackWithSubTrackPos ~> ((trackPos, subTrack) => ParsedTrackPosition(None, None, Some(trackPos), Some(subTrack))) |
trackPosWithSide ~> ((mediumAndTrackPos) => ParsedTrackPosition(None, Some(mediumAndTrackPos._1), Some(mediumAndTrackPos._2), None)) |
trackPos ~> (trackPos => ParsedTrackPosition(None, None, Some(trackPos), None))
}
case false =>
rule {
mediumPosMandatoryFormat ~ dotSeparator ~ trackWithSubTrackPos ~> ((mediumPos, trackPos, subTrack) => ParsedTrackPosition(Some(mediumPos._1), Some(mediumPos._2), Some(trackPos), Some(subTrack))) |
mediumPosMandatoryFormat ~ dotSeparator ~ trackPos ~> ((mediumPos, trackPos) => ParsedTrackPosition(Some(mediumPos._1), Some(mediumPos._2), Some(trackPos), None)) |
mediumPos ~ hyphenSeparator ~ trackWithSubTrackPos ~> ((mediumPos, trackPos, subTrack) => ParsedTrackPosition(mediumPos._1, mediumPos._2, Some(trackPos), Some(subTrack))) |
mediumPos ~ hyphenSeparator ~ trackPos ~> ((mediumPos, trackPos) => ParsedTrackPosition(mediumPos._1, mediumPos._2, Some(trackPos), None)) |
trackWithSubTrackPos ~> ((trackPos, subTrack) => ParsedTrackPosition(None, None, Some(trackPos), Some(subTrack))) |
trackPosWithSide ~> ((mediumAndTrackPos) => ParsedTrackPosition(None, Some(mediumAndTrackPos._1), Some(mediumAndTrackPos._2), None)) |
trackPos ~> (trackPos => ParsedTrackPosition(None, None, Some(trackPos), None))
}
}
}
/**
* The track position that was parsed. "format" is an optional format for the encompassing medium.
*/
case class ParsedTrackPosition(format: Option[String], mediumPosition: Option[String], trackPosition: Option[String], subTrack: Option[String])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment