Skip to content

Instantly share code, notes, and snippets.

@tutysara
Created November 22, 2017 22:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tutysara/27b2a06e461e3ebab1b360dadaaa5872 to your computer and use it in GitHub Desktop.
Save tutysara/27b2a06e461e3ebab1b360dadaaa5872 to your computer and use it in GitHub Desktop.
#!/usr/bin/python
# -*- coding: utf-8 -*-
__author__ = 'AxelAli'
import os
import sys
#MADE BY AXEL ALI
#https://github.com/AxelAli
#USAGE:
# $ VTT Formatter.py [DIR]
# $ VTT Formatter.py ./subs
import re
print "Searching inside : "+sys.argv[1]
newdirectory = sys.argv[1]+"txt"
print "newdirectory", newdirectory
if not os.path.exists(newdirectory):
os.makedirs(newdirectory) #Creates a newdirectory
combined = open(os.path.join(newdirectory,"allcombined.txt"),'w') #Creates a file of all the subs combined for dataset
for file in os.listdir(sys.argv[1]): #Gets All the files inside DIR (Argument1)
if file.endswith(".vtt"):
print "Formating : "+file #Shows current File
newfile = open(os.path.join(newdirectory,file.replace(".vtt", ".txt")),'w') #Creates a newfile
with open(os.path.join(sys.argv[1], file)) as f:
contents_reached = False
for line in f: #a line for each file
# skip all lines meta deta till time line
if '-->' in line : #Couldnt Get the "or" working
contents_reached = True
continue
if contents_reached:
line = re.sub('<[^>]*>', '', line)
#print line, len(line)
if len(line) > 1:
newfile.write("%s" % line) #Add new line to new file
combined.write("%s" % line) #Add new line to combinedfile
print "DONE!" #NEXT ONE!
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment