RyanFleck/transcribe_mp3.py

## transcribe_mp3.py
import os
import speech_recognition as sr
from pydub import AudioSegment
from pydub.silence import split_on_silence

r = sr.Recognizer()
r.energy_threshold = 300


def clean_text(text:str):
    text = text.strip()
    text = text[0].upper() + text[1:]
    return text + ". "

def split_and_transcribe(path):
    print("Opening audio segment...")
    sound = AudioSegment.from_mp3(path)
    print("Splitting audio file...")
    chunks = split_on_silence(
        sound, min_silence_len=1000, silence_thresh=sound.dBFS - 14, keep_silence=500
    )
    folder_name = "audio-chunks"

    if not os.path.isdir(folder_name):
        print("Creating a new temp directory for audio chunks.")
        os.mkdir(folder_name)

    document = ""

    for i, audio_chunk in enumerate(chunks, start=1):
        print(f"Exporting chunk {i}")
        chunk_filename = os.path.join(folder_name, f"chunk{i}.wav")
        audio_chunk.export(chunk_filename, format="wav")
        print("Opening chunk...")
        with sr.AudioFile(chunk_filename) as source:
            print(f"Recognizing chunk with name {chunk_filename}")
            audio_listened = r.record(source)
            print("Converting to text...")
            try:
                text: str = r.recognize_google(audio_listened)
            except sr.UnknownValueError as e:
                print("Unknown Value Error: ", str(e))
                if document and not document.endswith("\n"):
                    document += "\n\n"
            else:
                text: str = clean_text(text)
                print(f"{chunk_filename} =>  '{text}'")
                document += text

    return document


def extractname(filepath):
    base = os.path.basename(filepath)
    base_split = base.split(".")[:-1]
    return ".".join(base_split)


def transcribe(filename, to_filename):
    print(f"Transcribing {filename} => {to_filename}")
    return split_and_transcribe(filename)


def replace_common_transcription_errors(text: str):
    """Replace AND and BUT sentence starts/ends, and other errors."""
    print("Replacing transcription errors...")
    text = text.replace("and.", "and...")
    text = text.replace("and... And", "and...")
    text = text.replace(". And ", ", and ")
    text = text.replace(". But ", ", but ")
    # text = text.replace(" i ", " I ")
    return text


def main():
    mp3s = []
    print("Finding all MP3s...")
    for file in os.listdir():
        if file.endswith("MP3"):
            mp3s.append(file)

    if len(mp3s) > 0:
        for file in mp3s:
            print(f"Processing MP3 '{file}'")
            filename = extractname(file)
            transcribedfile = f"{filename}.txt"
            if os.path.exists(transcribedfile):
                print("A transcription already exists: "+str(transcribedfile))
            else:
                print("No text file found. Transcribing...")
                text = transcribe(file, transcribedfile)
                text = replace_common_transcription_errors(text)
                print("\nGot Text:\n\n")
                print(text)
                print()
                with open(transcribedfile, "w") as file:
                    print("Writing to file...")
                    file.write(f"Full Transcript for Audio File {filename}.MP3\n\n")
                    file.write(text)
                    file.close()


main()
	import os
	import speech_recognition as sr
	from pydub import AudioSegment
	from pydub.silence import split_on_silence

	r = sr.Recognizer()
	r.energy_threshold = 300


	def clean_text(text:str):
	text = text.strip()
	text = text[0].upper() + text[1:]
	return text + ". "

	def split_and_transcribe(path):
	print("Opening audio segment...")
	sound = AudioSegment.from_mp3(path)
	print("Splitting audio file...")
	chunks = split_on_silence(
	sound, min_silence_len=1000, silence_thresh=sound.dBFS - 14, keep_silence=500
	)
	folder_name = "audio-chunks"

	if not os.path.isdir(folder_name):
	print("Creating a new temp directory for audio chunks.")
	os.mkdir(folder_name)

	document = ""

	for i, audio_chunk in enumerate(chunks, start=1):
	print(f"Exporting chunk {i}")
	chunk_filename = os.path.join(folder_name, f"chunk{i}.wav")
	audio_chunk.export(chunk_filename, format="wav")
	print("Opening chunk...")
	with sr.AudioFile(chunk_filename) as source:
	print(f"Recognizing chunk with name {chunk_filename}")
	audio_listened = r.record(source)
	print("Converting to text...")
	try:
	text: str = r.recognize_google(audio_listened)
	except sr.UnknownValueError as e:
	print("Unknown Value Error: ", str(e))
	if document and not document.endswith("\n"):
	document += "\n\n"
	else:
	text: str = clean_text(text)
	print(f"{chunk_filename} => '{text}'")
	document += text

	return document


	def extractname(filepath):
	base = os.path.basename(filepath)
	base_split = base.split(".")[:-1]
	return ".".join(base_split)


	def transcribe(filename, to_filename):
	print(f"Transcribing {filename} => {to_filename}")
	return split_and_transcribe(filename)


	def replace_common_transcription_errors(text: str):
	"""Replace AND and BUT sentence starts/ends, and other errors."""
	print("Replacing transcription errors...")
	text = text.replace("and.", "and...")
	text = text.replace("and... And", "and...")
	text = text.replace(". And ", ", and ")
	text = text.replace(". But ", ", but ")
	# text = text.replace(" i ", " I ")
	return text


	def main():
	mp3s = []
	print("Finding all MP3s...")
	for file in os.listdir():
	if file.endswith("MP3"):
	mp3s.append(file)

	if len(mp3s) > 0:
	for file in mp3s:
	print(f"Processing MP3 '{file}'")
	filename = extractname(file)
	transcribedfile = f"{filename}.txt"
	if os.path.exists(transcribedfile):
	print("A transcription already exists: "+str(transcribedfile))
	else:
	print("No text file found. Transcribing...")
	text = transcribe(file, transcribedfile)
	text = replace_common_transcription_errors(text)
	print("\nGot Text:\n\n")
	print(text)
	print()
	with open(transcribedfile, "w") as file:
	print("Writing to file...")
	file.write(f"Full Transcript for Audio File {filename}.MP3\n\n")
	file.write(text)
	file.close()


	main()