Skip to content

Instantly share code, notes, and snippets.

@AlanD20
Created July 3, 2023 19:12
Show Gist options
  • Save AlanD20/e846c29dace3f85c45cffd0b6f621e36 to your computer and use it in GitHub Desktop.
Save AlanD20/e846c29dace3f85c45cffd0b6f621e36 to your computer and use it in GitHub Desktop.

Generate Text From Video using Python

Install the following python modules:

pip3 install librosa SpeechRecognition

Install ffmpeg CLI app

import os
import math
import librosa # pip3 install librosa
import speech_recognition as sr # pip3 install SpeechRecognition
# install `ffmpeg`
# Goes over every singl e video in the input directory, then converts to mp3, to generate subtitles
inputDir='inputs' # input videos
subDir='subs' # output subtitle directory
outputDir='out' # output mp3
def makeDir(name: str) -> None:
if not(os.path.exists(name)):
os.mkdir(name)
makeDir(inputDir)
makeDir(subDir)
makeDir(outputDir)
for file in os.listdir(inputDir):
inputFile = os.path.join(inputDir, file)
# checking if it is a file
if os.path.isfile(inputFile):
filename = file.split('.')[0]
print(f'- Processing "{filename}":')
outputFile = f'{outputDir}/{filename}.wav'
# generate audio if doesn't exist
if not(os.path.exists(outputFile)):
print(f'\t* [Process] Converting {filename} to wav...')
command2wav = f'ffmpeg -i {inputFile} {outputDir}/{filename}.wav 2> /dev/null'
os.system(command2wav)
print(f'\t* [Done] {filename} converted to wav.')
else:
print(f'\t* [Skip] {filename} Audio file already exist.')
subFile = f'{subDir}/{filename}.txt'
if not(os.path.exists(subFile)):
print(f'\t* [Process] Generating subtitle for {filename}...')
r = sr.Recognizer()
# Generate subtitle by chunk per 120 seconds
durationPerCycle = 120
totalDuration = math.floor(librosa.get_duration(path=outputFile))
duration = 0
offset = 0
# Loop until the entire video is generated
while duration < totalDuration:
offset += duration
duration += durationPerCycle
# if 5 seconds left, skip it. Edit this value if 5 seconds fail for you
if (totalDuration - (duration - durationPerCycle)) < 5:
offset = duration - durationPerCycle
duration = totalDuration
print(f'\t\t* [Fail] Unable to transcribe from {offset / 60} to {duration / 60} minutes')
break;
# set the remaining duration if duration is less than per cycle
if duration >= totalDuration:
offset = duration - durationPerCycle
duration = totalDuration
print(f'\t\t* Transcribing from {offset / 60} to {duration / 60} out of {totalDuration / 60} minutes.')
audio = sr.AudioFile(outputFile)
with audio as source:
audio = r.record(source, duration=duration, offset=offset)
transcribe = r.recognize_google(audio)
# Append to subtitle file
with open(f'{subDir}/{filename}.txt', 'a') as subOutFile:
subOutFile.write('\n'+transcribe)
print(f'\t\t* [Done] Subtitle generated from {offset / 60} to {duration / 60} for {filename}.')
print(f'\t* [Complete] {filename} is processed!\n')
else:
print(f'\t* [Skip] Subtitle for {filename} is already exist.\n')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment