/srt_parser_with_validation.swift Secret
Last active
January 26, 2021 18:46
SRT parser with validation (!).
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
let timecodeLineParser = timecodeParser | |
.skip(StartsWith(" --> ".utf8)) | |
.take(timecodeParser) | |
.filter(<) // `start`- and `endTimeCode`s must strictly increase within a group. | |
// … | |
let srtParser = Many(srtGroupParser, separator: Newline().skip(Newline())) | |
.skip(End()) | |
.flatMap { groups in | |
groups.sequenceNumbersAndTimecodesAreInIncreasingOrder() ? | |
Conditional.first(Always(groups)) : | |
.second(Fail()) | |
} | |
private extension Collection where Element == SubtitleGroup { | |
func sequenceNumbersAndTimecodesAreInIncreasingOrder () -> Bool { | |
guard first?.sequenceNumber == 1 else { return false } | |
return zip(self, self.dropFirst()) // This trick [Nate Cook](https://twitter.com/nnnnnnnn) taught me | |
// never gets old. <3 | |
.allSatisfy { first, second in | |
first.sequenceNumber + 1 == second.sequenceNumber && | |
first.endTimecode <= second.startTimecode // Adjacent groups must have _non-decreasing_ timecodes. | |
} | |
} | |
} | |
let sampleSRTString = | |
""" | |
1 | |
00:00:00,540 --> 00:00:00,960 | |
Yo-yo | |
2 | |
00:00:00,960 --> 00:00:01,490 | |
yo | |
3 | |
00:00:01,520 --> 00:00:01,830 | |
this | |
4 | |
00:00:01,830 --> 00:00:02,010 | |
is | |
5 | |
00:00:02,010 --> 00:00:02,460 | |
an | |
6 | |
00:00:02,460 --> 00:00:02,760 | |
audio | |
7 | |
00:00:02,760 --> 00:00:03,240 | |
recording | |
""" | |
dump(srtParser.parse(sampleSRTString.utf8)!) | |
// Outputs: | |
// ``` | |
// ▿ 7 elements | |
// ▿ SubtitleGroup | |
// - sequenceNumber: 1 | |
// - startTimecode: 0.54 | |
// - endTimecode: 0.96 | |
// - substring: "Yo-yo" | |
// ▿ SubtitleGroup | |
// - sequenceNumber: 2 | |
// - startTimecode: 0.96 | |
// - endTimecode: 1.49 | |
// - substring: "yo" | |
// ▿ SubtitleGroup | |
// - sequenceNumber: 3 | |
// - startTimecode: 1.52 | |
// - endTimecode: 1.83 | |
// - substring: "this" | |
// ▿ SubtitleGroup | |
// - sequenceNumber: 4 | |
// - startTimecode: 1.83 | |
// - endTimecode: 2.01 | |
// - substring: "is" | |
// ▿ SubtitleGroup | |
// - sequenceNumber: 5 | |
// - startTimecode: 2.01 | |
// - endTimecode: 2.46 | |
// - substring: "an" | |
// ▿ SubtitleGroup | |
// - sequenceNumber: 6 | |
// - startTimecode: 2.46 | |
// - endTimecode: 2.76 | |
// - substring: "audio" | |
// ▿ SubtitleGroup | |
// - sequenceNumber: 7 | |
// - startTimecode: 2.76 | |
// - endTimecode: 3.24 | |
// - substring: "recording" | |
// ``` |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment