-
-
Save jasdev/4f5f53d3f6f9ca977bcead97ae7b34c6 to your computer and use it in GitHub Desktop.
SRT parser with validation (!).
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
let timecodeLineParser = timecodeParser | |
.skip(StartsWith(" --> ".utf8)) | |
.take(timecodeParser) | |
.filter(<) // `start`- and `endTimeCode`s must strictly increase within a group. | |
// … | |
let srtParser = Many(srtGroupParser, separator: Newline().skip(Newline())) | |
.skip(End()) | |
.flatMap { groups in | |
groups.sequenceNumbersAndTimecodesAreInIncreasingOrder() ? | |
Conditional.first(Always(groups)) : | |
.second(Fail()) | |
} | |
private extension Collection where Element == SubtitleGroup { | |
func sequenceNumbersAndTimecodesAreInIncreasingOrder () -> Bool { | |
guard first?.sequenceNumber == 1 else { return false } | |
return zip(self, self.dropFirst()) // This trick [Nate Cook](https://twitter.com/nnnnnnnn) taught me | |
// never gets old. <3 | |
.allSatisfy { first, second in | |
first.sequenceNumber + 1 == second.sequenceNumber && | |
first.endTimecode <= second.startTimecode // Adjacent groups must have _non-decreasing_ timecodes. | |
} | |
} | |
} | |
let sampleSRTString = | |
""" | |
1 | |
00:00:00,540 --> 00:00:00,960 | |
Yo-yo | |
2 | |
00:00:00,960 --> 00:00:01,490 | |
yo | |
3 | |
00:00:01,520 --> 00:00:01,830 | |
this | |
4 | |
00:00:01,830 --> 00:00:02,010 | |
is | |
5 | |
00:00:02,010 --> 00:00:02,460 | |
an | |
6 | |
00:00:02,460 --> 00:00:02,760 | |
audio | |
7 | |
00:00:02,760 --> 00:00:03,240 | |
recording | |
""" | |
dump(srtParser.parse(sampleSRTString.utf8)!) | |
// Outputs: | |
// ``` | |
// ▿ 7 elements | |
// ▿ SubtitleGroup | |
// - sequenceNumber: 1 | |
// - startTimecode: 0.54 | |
// - endTimecode: 0.96 | |
// - substring: "Yo-yo" | |
// ▿ SubtitleGroup | |
// - sequenceNumber: 2 | |
// - startTimecode: 0.96 | |
// - endTimecode: 1.49 | |
// - substring: "yo" | |
// ▿ SubtitleGroup | |
// - sequenceNumber: 3 | |
// - startTimecode: 1.52 | |
// - endTimecode: 1.83 | |
// - substring: "this" | |
// ▿ SubtitleGroup | |
// - sequenceNumber: 4 | |
// - startTimecode: 1.83 | |
// - endTimecode: 2.01 | |
// - substring: "is" | |
// ▿ SubtitleGroup | |
// - sequenceNumber: 5 | |
// - startTimecode: 2.01 | |
// - endTimecode: 2.46 | |
// - substring: "an" | |
// ▿ SubtitleGroup | |
// - sequenceNumber: 6 | |
// - startTimecode: 2.46 | |
// - endTimecode: 2.76 | |
// - substring: "audio" | |
// ▿ SubtitleGroup | |
// - sequenceNumber: 7 | |
// - startTimecode: 2.76 | |
// - endTimecode: 3.24 | |
// - substring: "recording" | |
// ``` |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment