Skip to content

Instantly share code, notes, and snippets.

@RyanFleck
Created December 26, 2020 21:25
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save RyanFleck/8d609b992b10398c851434e1cfa9eda8 to your computer and use it in GitHub Desktop.
Save RyanFleck/8d609b992b10398c851434e1cfa9eda8 to your computer and use it in GitHub Desktop.
# transcribe_video.py
# A script to strip the audio from a video, split the audio into chunks, and send
# each chunk through the Google voice-to-text library to get the transcript.
# Requires: pydub, moviepy, numpy==1.19.3
# FFMPEG must also be installed on the system.
# Tested on Windows 10.
import speech_recognition as sr
from moviepy.editor import AudioFileClip
import os
from pydub import AudioSegment
from pydub.silence import split_on_silence
# Sources:
# https://www.thepythoncode.com/article/using-speech-recognition-to-convert-speech-to-text-python
# https://medium.com/python-in-plain-english/how-to-extract-audio-from-video-in-python-51c4dcd5989f
r = sr.Recognizer()
r.energy_threshold = 300
def clean_text(text:str):
text = text.strip()
text = text[0].upper() + text[1:]
return text + ". "
def split_and_transcribe(path):
print("Opening audio segment...")
sound = AudioSegment.from_wav(path)
print("Splitting audio file...")
chunks = split_on_silence(
sound, min_silence_len=1000, silence_thresh=sound.dBFS - 14, keep_silence=500
)
folder_name = "audio-chunks"
if not os.path.isdir(folder_name):
print("Creating a new temp directory for audio chunks.")
os.mkdir(folder_name)
document = ""
for i, audio_chunk in enumerate(chunks, start=1):
print(f"Exporting chunk {i}")
chunk_filename = os.path.join(folder_name, f"chunk{i}.wav")
audio_chunk.export(chunk_filename, format="wav")
print("Opening chunk...")
with sr.AudioFile(chunk_filename) as source:
print(f"Recognizing chunk with name {chunk_filename}")
audio_listened = r.record(source)
print("Converting to text...")
try:
text: str = r.recognize_google(audio_listened)
except sr.UnknownValueError as e:
print("Unknown Value Error: ", str(e))
if document and not document.endswith("\n"):
document += "\n\n"
else:
text: str = clean_text(text)
print(f"{chunk_filename} => '{text}'")
document += text
return document
def extractname(filepath):
base = os.path.basename(filepath)
base_split = base.split(".")[:-1]
return ".".join(base_split)
def transcribe(filename, to_filename):
print(f"Transcribing {filename} => {to_filename}")
return split_and_transcribe(filename)
def replace_common_transcription_errors(text: str):
"""Replace AND and BUT sentence starts/ends, and other errors."""
print("Replacing transcription errors...")
text = text.replace("and.", "and...")
text = text.replace("and... And", "and...")
text = text.replace(". And ", ", and ")
text = text.replace(". But ", ", but ")
# text = text.replace(" i ", " I ")
return text
def main():
movies = []
print("Finding all movies...")
for file in os.listdir():
if file.endswith("mp4"):
movies.append(file)
if file.endswith("webm"):
movies.append(file)
if len(movies) > 0:
for file in movies:
print(f"Processing Movie '{file}'")
filename = extractname(file)
transcribedfile = f"{filename}.txt"
if os.path.exists(transcribedfile):
print("A transcription already exists: "+str(transcribedfile))
else:
print("No text file found. Transcribing...")
print("Converting to audio...")
audio = AudioFileClip(str(file))
audio.write_audiofile(f"{filename}.wav")
text = transcribe(f"{filename}.wav", transcribedfile)
text = replace_common_transcription_errors(text)
print("\nGot Text:\n\n")
print(text)
print()
with open(transcribedfile, "w") as file:
print("Writing to file...")
file.write(f"Full Transcript for Audio File {filename}.MP3\n\n")
file.write(text)
file.close()
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment