geerlingguy/txt-file-compile.sh

## txt-file-compile.sh
#!/bin/bash

# Bash function to append contents of a given text file to an output file.
file_compile() {
    file=$1
    filename=$(basename "$file")
    output_file="txt-file-output.txt"

    printf "Adding contents of: $filename\n"

    # Write filename as Markdown header, then \n, then contents, then \n\n.
    printf "## $filename\n\n" >>$output_file
    cat "$file" | sed -e 's/^ //g' >>$output_file
    printf '\n\n\n' >>$output_file
}
export -f file_compile

# Find all files with case-insensitive extension '.txt' walking through
# directories in alphabetical order, and process them.
find . -type f -iname \*.txt -print0 | sort -z | xargs -0r -n1 bash -c 'file_compile "$0"'

## whisper-transcribe.py
#!/usr/bin/env python3

# First make sure Whisper is installed:
#
#   pip3 install git+https://github.com/openai/whisper.git
#
# On my Mac, I'm using pyenv to run Python 3.10 (currently) since 3.11 is too
# new for some of the dependencies.

import os
import whisper
from tqdm import tqdm

current_dir = os.path.dirname(os.path.realpath(__file__))
num_files = 0

# Load the Whisper model.
print("Loading whisper model...")
model = whisper.load_model("small.en")

# Count how many files we need to process.
for dirpath, dirnames, filenames in os.walk(current_dir):
    for filename in filenames:
        if filename.lower().endswith((".mov", ".mp4")) and "vlog" in filename:
            # print(filename)
            num_files += 1
print("Number of files to process: ", num_files)

# Transcribe the files, displaying a progress bar.
with tqdm(total=num_files, desc="Transcribing Files") as progress_bar:
    for dirpath, dirnames, filenames in os.walk(current_dir):
        for filename in filenames:
            if filename.lower().endswith((".mov", ".mp4")) and "vlog" in filename:
                progress_bar.set_postfix_str(filename)

                # Transcribe with whisper.
                filepath = os.path.join(dirpath, filename)
                result = model.transcribe(filepath, language="English", fp16=False)
                transcription = result['text']

                # Write transcription to txt file.
                filename_no_ext = os.path.splitext(filename)[0]
                with open(os.path.join(dirpath, filename_no_ext + '.txt'), 'w') as f:
                    f.write(transcription)

                # Update the progress bar.
                progress_bar.update(1)
	#!/bin/bash

	# Bash function to append contents of a given text file to an output file.
	file_compile() {
	file=$1
	filename=$(basename "$file")
	output_file="txt-file-output.txt"

	printf "Adding contents of: $filename\n"

	# Write filename as Markdown header, then \n, then contents, then \n\n.
	printf "## $filename\n\n" >>$output_file
	cat "$file" \| sed -e 's/^ //g' >>$output_file
	printf '\n\n\n' >>$output_file
	}
	export -f file_compile

	# Find all files with case-insensitive extension '.txt' walking through
	# directories in alphabetical order, and process them.
	find . -type f -iname \*.txt -print0 \| sort -z \| xargs -0r -n1 bash -c 'file_compile "$0"'
	#!/usr/bin/env python3

	# First make sure Whisper is installed:
	#
	# pip3 install git+https://github.com/openai/whisper.git
	#
	# On my Mac, I'm using pyenv to run Python 3.10 (currently) since 3.11 is too
	# new for some of the dependencies.

	import os
	import whisper
	from tqdm import tqdm

	current_dir = os.path.dirname(os.path.realpath(__file__))
	num_files = 0

	# Load the Whisper model.
	print("Loading whisper model...")
	model = whisper.load_model("small.en")

	# Count how many files we need to process.
	for dirpath, dirnames, filenames in os.walk(current_dir):
	for filename in filenames:
	if filename.lower().endswith((".mov", ".mp4")) and "vlog" in filename:
	# print(filename)
	num_files += 1
	print("Number of files to process: ", num_files)

	# Transcribe the files, displaying a progress bar.
	with tqdm(total=num_files, desc="Transcribing Files") as progress_bar:
	for dirpath, dirnames, filenames in os.walk(current_dir):
	for filename in filenames:
	if filename.lower().endswith((".mov", ".mp4")) and "vlog" in filename:
	progress_bar.set_postfix_str(filename)

	# Transcribe with whisper.
	filepath = os.path.join(dirpath, filename)
	result = model.transcribe(filepath, language="English", fp16=False)
	transcription = result['text']

	# Write transcription to txt file.
	filename_no_ext = os.path.splitext(filename)[0]
	with open(os.path.join(dirpath, filename_no_ext + '.txt'), 'w') as f:
	f.write(transcription)

	# Update the progress bar.
	progress_bar.update(1)