Skip to content

Instantly share code, notes, and snippets.

@jasdev
Last active January 26, 2021 18:46
Show Gist options
  • Save jasdev/4f5f53d3f6f9ca977bcead97ae7b34c6 to your computer and use it in GitHub Desktop.
Save jasdev/4f5f53d3f6f9ca977bcead97ae7b34c6 to your computer and use it in GitHub Desktop.
SRT parser with validation (!).
let timecodeLineParser = timecodeParser
.skip(StartsWith(" --> ".utf8))
.take(timecodeParser)
.filter(<) // `start`- and `endTimeCode`s must strictly increase within a group.
// …
let srtParser = Many(srtGroupParser, separator: Newline().skip(Newline()))
.skip(End())
.flatMap { groups in
groups.sequenceNumbersAndTimecodesAreInIncreasingOrder() ?
Conditional.first(Always(groups)) :
.second(Fail())
}
private extension Collection where Element == SubtitleGroup {
func sequenceNumbersAndTimecodesAreInIncreasingOrder () -> Bool {
guard first?.sequenceNumber == 1 else { return false }
return zip(self, self.dropFirst()) // This trick [Nate Cook](https://twitter.com/nnnnnnnn) taught me
// never gets old. <3
.allSatisfy { first, second in
first.sequenceNumber + 1 == second.sequenceNumber &&
first.endTimecode <= second.startTimecode // Adjacent groups must have _non-decreasing_ timecodes.
}
}
}
let sampleSRTString =
"""
1
00:00:00,540 --> 00:00:00,960
Yo-yo
2
00:00:00,960 --> 00:00:01,490
yo
3
00:00:01,520 --> 00:00:01,830
this
4
00:00:01,830 --> 00:00:02,010
is
5
00:00:02,010 --> 00:00:02,460
an
6
00:00:02,460 --> 00:00:02,760
audio
7
00:00:02,760 --> 00:00:03,240
recording
"""
dump(srtParser.parse(sampleSRTString.utf8)!)
// Outputs:
// ```
// ▿ 7 elements
// ▿ SubtitleGroup
// - sequenceNumber: 1
// - startTimecode: 0.54
// - endTimecode: 0.96
// - substring: "Yo-yo"
// ▿ SubtitleGroup
// - sequenceNumber: 2
// - startTimecode: 0.96
// - endTimecode: 1.49
// - substring: "yo"
// ▿ SubtitleGroup
// - sequenceNumber: 3
// - startTimecode: 1.52
// - endTimecode: 1.83
// - substring: "this"
// ▿ SubtitleGroup
// - sequenceNumber: 4
// - startTimecode: 1.83
// - endTimecode: 2.01
// - substring: "is"
// ▿ SubtitleGroup
// - sequenceNumber: 5
// - startTimecode: 2.01
// - endTimecode: 2.46
// - substring: "an"
// ▿ SubtitleGroup
// - sequenceNumber: 6
// - startTimecode: 2.46
// - endTimecode: 2.76
// - substring: "audio"
// ▿ SubtitleGroup
// - sequenceNumber: 7
// - startTimecode: 2.76
// - endTimecode: 3.24
// - substring: "recording"
// ```
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment