Last active
January 20, 2016 11:15
-
-
Save gravelld/6f068e53a44cdb4738a8 to your computer and use it in GitHub Desktop.
A parboiled2 parser for Discogs track positions
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package com.elsten.adb.extract.discogs.release | |
import org.parboiled2._ | |
/* | |
* Parses strings according to http://www.discogs.com/help/doc/submission-guidelines-release-trk#Position | |
*/ | |
class DiscogsTrackPositionParser(val input: ParserInput, val isMultiDisc: Boolean = false, val isAutocoupled: Boolean = false) extends Parser { | |
def mediumPrefixKnownFormat = rule { "DVD" | "CD" } | |
def mediumPos = rule { | |
optional(capture(mediumPrefixKnownFormat)) ~ capture(oneOrMore(CharPredicate.Digit)) ~> ((format, pos) => (format, Some(pos))) | |
} | |
def mediumPosMandatoryFormat = rule { capture(mediumPrefixKnownFormat) ~ capture(oneOrMore(CharPredicate.Digit)) ~> ((format, pos) => (format, pos)) } | |
def subtrackDigit = rule { "." ~ capture(oneOrMore(CharPredicate.Digit)) } | |
def subtrackAlpha = rule { optional(".") ~ capture(oneOrMore(CharPredicate.Alpha)) } | |
def side = rule { CharPredicate.Alpha } | |
def trackWithSubTrackPos = rule { | |
capture(optional(side) ~ oneOrMore(CharPredicate.Digit)) ~ subtrackAlpha | | |
capture(optional(side) ~ oneOrMore(CharPredicate.Digit)) ~ subtrackDigit | | |
capture(oneOrMore(CharPredicate.Digit)) ~ subtrackAlpha | |
} | |
// a map of auto coupled sides to medium number | |
// Commented out further discs. See Songs In The Key Of Life which has 3 media, but trying to auto couple them will give four media, 'E' -> 3, 'H' -> 3, 'F' -> 4, 'G' -> 4 | |
val autocoupledSeq = Map('A' -> 1, 'D' -> 1, 'B' -> 2, 'C' -> 2) | |
def convertToMediumNumber(mediumChar: Char) = { | |
def charCodeModulus = ((mediumChar.toUpper - 'A'.toInt) / 2) + 1 | |
isAutocoupled match { | |
case true => autocoupledSeq.get(mediumChar.toUpper).getOrElse(charCodeModulus) | |
case false => charCodeModulus | |
} | |
} | |
def trackPosWithSide = rule { | |
capture(side) ~ capture(oneOrMore(CharPredicate.Alpha)) ~> ((side, pos) => (convertToMediumNumber(side.head).toString, side + pos)) | | |
capture(side) ~ capture(oneOrMore(CharPredicate.Digit)) ~> ((side, pos) => (convertToMediumNumber(side.head).toString, side + pos)) | | |
capture(oneOrMore(CharPredicate.Alpha)) ~> ((pos) => (convertToMediumNumber(pos.head).toString, pos)) | |
} | |
def trackPos = rule { | |
capture(oneOrMore(CharPredicate.Digit)) | | |
capture(oneOrMore(CharPredicate.Alpha)) | |
} | |
def dotSeparator = rule { "." } | |
def hyphenSeparator = rule { "-" } | |
def mediumAndTrack = isMultiDisc match { | |
case true => | |
rule { | |
mediumPosMandatoryFormat ~ dotSeparator ~ trackWithSubTrackPos ~> ((mediumPos, trackPos, subTrack) => ParsedTrackPosition(Some(mediumPos._1), Some(mediumPos._2), Some(trackPos), Some(subTrack))) | | |
mediumPosMandatoryFormat ~ dotSeparator ~ trackPos ~> ((mediumPos, trackPos) => ParsedTrackPosition(Some(mediumPos._1), Some(mediumPos._2), Some(trackPos), None)) | | |
mediumPos ~ dotSeparator ~ trackPos ~> ((mediumPos, trackPos) => ParsedTrackPosition(mediumPos._1, mediumPos._2, Some(trackPos), None)) | | |
mediumPos ~ hyphenSeparator ~ trackWithSubTrackPos ~> ((mediumPos, trackPos, subTrack) => ParsedTrackPosition(mediumPos._1, mediumPos._2, Some(trackPos), Some(subTrack))) | | |
mediumPos ~ hyphenSeparator ~ trackPos ~> ((mediumPos, trackPos) => ParsedTrackPosition(mediumPos._1, mediumPos._2, Some(trackPos), None)) | | |
trackWithSubTrackPos ~> ((trackPos, subTrack) => ParsedTrackPosition(None, None, Some(trackPos), Some(subTrack))) | | |
trackPosWithSide ~> ((mediumAndTrackPos) => ParsedTrackPosition(None, Some(mediumAndTrackPos._1), Some(mediumAndTrackPos._2), None)) | | |
trackPos ~> (trackPos => ParsedTrackPosition(None, None, Some(trackPos), None)) | |
} | |
case false => | |
rule { | |
mediumPosMandatoryFormat ~ dotSeparator ~ trackWithSubTrackPos ~> ((mediumPos, trackPos, subTrack) => ParsedTrackPosition(Some(mediumPos._1), Some(mediumPos._2), Some(trackPos), Some(subTrack))) | | |
mediumPosMandatoryFormat ~ dotSeparator ~ trackPos ~> ((mediumPos, trackPos) => ParsedTrackPosition(Some(mediumPos._1), Some(mediumPos._2), Some(trackPos), None)) | | |
mediumPos ~ hyphenSeparator ~ trackWithSubTrackPos ~> ((mediumPos, trackPos, subTrack) => ParsedTrackPosition(mediumPos._1, mediumPos._2, Some(trackPos), Some(subTrack))) | | |
mediumPos ~ hyphenSeparator ~ trackPos ~> ((mediumPos, trackPos) => ParsedTrackPosition(mediumPos._1, mediumPos._2, Some(trackPos), None)) | | |
trackWithSubTrackPos ~> ((trackPos, subTrack) => ParsedTrackPosition(None, None, Some(trackPos), Some(subTrack))) | | |
trackPosWithSide ~> ((mediumAndTrackPos) => ParsedTrackPosition(None, Some(mediumAndTrackPos._1), Some(mediumAndTrackPos._2), None)) | | |
trackPos ~> (trackPos => ParsedTrackPosition(None, None, Some(trackPos), None)) | |
} | |
} | |
} | |
/** | |
* The track position that was parsed. "format" is an optional format for the encompassing medium. | |
*/ | |
case class ParsedTrackPosition(format: Option[String], mediumPosition: Option[String], trackPosition: Option[String], subTrack: Option[String]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment