Install the following python modules:
pip3 install librosa SpeechRecognition
Install ffmpeg
CLI app
import os | |
import math | |
import librosa # pip3 install librosa | |
import speech_recognition as sr # pip3 install SpeechRecognition | |
# install `ffmpeg` | |
# Goes over every singl e video in the input directory, then converts to mp3, to generate subtitles | |
inputDir='inputs' # input videos | |
subDir='subs' # output subtitle directory | |
outputDir='out' # output mp3 | |
def makeDir(name: str) -> None: | |
if not(os.path.exists(name)): | |
os.mkdir(name) | |
makeDir(inputDir) | |
makeDir(subDir) | |
makeDir(outputDir) | |
for file in os.listdir(inputDir): | |
inputFile = os.path.join(inputDir, file) | |
# checking if it is a file | |
if os.path.isfile(inputFile): | |
filename = file.split('.')[0] | |
print(f'- Processing "{filename}":') | |
outputFile = f'{outputDir}/{filename}.wav' | |
# generate audio if doesn't exist | |
if not(os.path.exists(outputFile)): | |
print(f'\t* [Process] Converting {filename} to wav...') | |
command2wav = f'ffmpeg -i {inputFile} {outputDir}/{filename}.wav 2> /dev/null' | |
os.system(command2wav) | |
print(f'\t* [Done] {filename} converted to wav.') | |
else: | |
print(f'\t* [Skip] {filename} Audio file already exist.') | |
subFile = f'{subDir}/{filename}.txt' | |
if not(os.path.exists(subFile)): | |
print(f'\t* [Process] Generating subtitle for {filename}...') | |
r = sr.Recognizer() | |
# Generate subtitle by chunk per 120 seconds | |
durationPerCycle = 120 | |
totalDuration = math.floor(librosa.get_duration(path=outputFile)) | |
duration = 0 | |
offset = 0 | |
# Loop until the entire video is generated | |
while duration < totalDuration: | |
offset += duration | |
duration += durationPerCycle | |
# if 5 seconds left, skip it. Edit this value if 5 seconds fail for you | |
if (totalDuration - (duration - durationPerCycle)) < 5: | |
offset = duration - durationPerCycle | |
duration = totalDuration | |
print(f'\t\t* [Fail] Unable to transcribe from {offset / 60} to {duration / 60} minutes') | |
break; | |
# set the remaining duration if duration is less than per cycle | |
if duration >= totalDuration: | |
offset = duration - durationPerCycle | |
duration = totalDuration | |
print(f'\t\t* Transcribing from {offset / 60} to {duration / 60} out of {totalDuration / 60} minutes.') | |
audio = sr.AudioFile(outputFile) | |
with audio as source: | |
audio = r.record(source, duration=duration, offset=offset) | |
transcribe = r.recognize_google(audio) | |
# Append to subtitle file | |
with open(f'{subDir}/{filename}.txt', 'a') as subOutFile: | |
subOutFile.write('\n'+transcribe) | |
print(f'\t\t* [Done] Subtitle generated from {offset / 60} to {duration / 60} for {filename}.') | |
print(f'\t* [Complete] {filename} is processed!\n') | |
else: | |
print(f'\t* [Skip] Subtitle for {filename} is already exist.\n') |