Skip to content

Instantly share code, notes, and snippets.

@ihfazhillah
Created May 15, 2016 19:39
Show Gist options
  • Save ihfazhillah/3654de4219d5863a91b4704a6549da1f to your computer and use it in GitHub Desktop.
Save ihfazhillah/3654de4219d5863a91b4704a6549da1f to your computer and use it in GitHub Desktop.
a simple vtt to srt converter
import unittest
"""A simple vtt to srt converter.
penggunaan cukup mudah, gunakan convert(path_vtt_file) dan isi file akan diubah menjadi srt file
dengan nama yang sama dan beda extensi.
Kita juga bisa tambahkan parameter lainnya yaitu path_to, berguna bila kita ingin nama yang berbeda.
misal:
1. sebuah file bernama sub.vtt
2. kita ingin sub.srt
penyelesaian:
convert("sub.vtt") <-- gunakan absolute path supaya berubah disama folder
misal:
1. sebuah file bernama sub_ku.vtt
2. kita ingin jadikan sub_mu.srt
penyelesaian:
convert("sub_ku.vtt", "sub_mu.srt") <-- gunakan absolute path supaya berubah disama folder
"""
vtt_text = """
WEBVTT
Kind: captions
Language: en
00:00:00.460 --> 00:00:02.170
Now imagine that this yellow sheet of
00:00:02.170 --> 00:00:05.640
paper is the Python Standard Library. Imagine that
00:00:05.640 --> 00:00:12.070
this is what we got when we downloaded Python. Inside Python is a file named
00:00:13.220 --> 00:00:17.150
webbrowser. And inside webbrowser is a function
00:00:17.150 --> 00:00:20.470
called open. Now this is the function that
00:00:20.470 --> 00:00:23.088
allowed us to play the YouTube video.
00:00:23.088 --> 00:00:25.780
Much like webbrowser, there is another file or
00:00:25.780 --> 00:00:28.920
module inside the Python Standard Library. It's
""".strip()
srt_expected = """
1
00:00:00.460 --> 00:00:02.170
Now imagine that this yellow sheet of
2
00:00:02.170 --> 00:00:05.640
paper is the Python Standard Library. Imagine that
3
00:00:05.640 --> 00:00:12.070
this is what we got when we downloaded Python. Inside Python is a file named
4
00:00:13.220 --> 00:00:17.150
webbrowser. And inside webbrowser is a function
5
00:00:17.150 --> 00:00:20.470
called open. Now this is the function that
6
00:00:20.470 --> 00:00:23.088
allowed us to play the YouTube video.
7
00:00:23.088 --> 00:00:25.780
Much like webbrowser, there is another file or
8
00:00:25.780 --> 00:00:28.920
module inside the Python Standard Library. It's
""".strip()
class VTTtoSRT(object):
def __init__(self, vtt_text):
self.vtt_text = vtt_text
def get_times(self):
splitted = self.vtt_text.split("\n")
times = []
for index, text in enumerate(splitted):
if "-->" in text:
times.append((text, index))
return times
def get_vals(self):
times = self.get_times()
val = []
for i_time, time in enumerate(times):
val_temp = []
time, l_time = time
for i_text, text in enumerate(self.vtt_text.split("\n")):
try:
if (times[i_time + 1][1] > i_text > l_time and text):
val_temp.append(text)
except IndexError:
if i_text > l_time and text:
val_temp.append(text)
val.append(val_temp)
return val
def convert(self):
keys = [time for time, _ in self.get_times()]
vals = self.get_vals()
key_temp = []
for index, key in enumerate(keys):
tmp = "{index}\n{key}".format(
index=str(index + 1), key=key)
key_temp.append(tmp)
result_temp = []
for index, key in enumerate(key_temp):
tmp = "{key}\n{val}\n".format(
key=key, val=vals[index][0])
result_temp.append(tmp)
return "\n".join(result_temp).strip()
class VTTtoSRTTestCase(unittest.TestCase):
maxDiff = None
def test_get_time_line(self):
vtt2srt = VTTtoSRT(vtt_text)
self.assertEqual(len(vtt2srt.get_times()), 8)
def test_first_time_line(self):
vtt2srt = VTTtoSRT(vtt_text)
expected = ('00:00:00.460 --> 00:00:02.170', 4)
self.assertEqual(vtt2srt.get_times()[0], expected)
def test_val(self):
vtt2srt = VTTtoSRT(vtt_text)
expected = ["webbrowser. And inside webbrowser is a function"]
self.assertEqual(vtt2srt.get_vals()[3], expected)
def test_convert(self):
vtt2srt = VTTtoSRT(vtt_text)
self.assertEqual(vtt2srt.convert(), srt_expected)
def convert(path, path_to=None):
with open(path, "r") as f:
file_object = f.read()
path_to = path_to or path.replace("vtt", "srt")
converted = VTTtoSRT(file_object).convert()
with open(path_to, "w") as f:
f.write(converted)
if __name__ == "__main__":
unittest.main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment