Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save abodacs/b5607786ae85661a7121c149b07005b3 to your computer and use it in GitHub Desktop.
Save abodacs/b5607786ae85661a7121c149b07005b3 to your computer and use it in GitHub Desktop.
#pip install git+https://github.com/huggingface/transformers.git
import datetime
import sys
from transformers import pipeline
from transformers.pipelines.audio_utils import ffmpeg_microphone_live
pipe = pipeline("automatic-speech-recognition", model="openai/whisper-base", device=0)
sampling_rate = pipe.feature_extractor.sampling_rate
start = datetime.datetime.now()
chunk_length_s = 5
stream_chunk_s = 0.1
mic = ffmpeg_microphone_live(
sampling_rate=sampling_rate,
chunk_length_s=chunk_length_s,
stream_chunk_s=stream_chunk_s,
)
print("Start talking...")
for item in pipe(mic):
sys.stdout.write("\033[K")
print(item["text"], end="\r")
if not item["partial"][0]:
print("")
@abodacs
Copy link
Author

abodacs commented Jul 12, 2023

# Setup:
# conda create -n whisper python=3.9
# conda activate whisper
# https://github.com/openai/whisper
# pip install git+https://github.com/openai/whisper.git 

# Usage:
# python whisper-audio-to-text.py --audio_dir my_files --out_dir texts

import argparse
import os
import os.path as osp
import subprocess

parser = argparse.ArgumentParser()

parser.add_argument(
    "--audio_dir", help="Path to the folder containing the input files.", type=str, required=True
)

parser.add_argument(
    "--out_dir", help="Path to the target folder for the transcripts.", type=str, required=True
)

parser.add_argument(
    "--whisper_model", type=str, choices=("small", "medium", "large"), default="medium"
)

parser.add_argument(
    "--print_only", type=str, default="false",
    help="Only prints the files to be processed instead "
         "of actually processing them", choices=("true", "false")
)

args = parser.parse_args()
args_d = {"true": True, "false": False}
args.print_only = args_d[args.print_only]

files_to_process = []
for file in os.listdir(args.audio_dir):
    if file.endswith(('.aac', '.mp3')):
        files_to_process.append(file)

print(f"Processing {len(files_to_process)} files:")
print("\n".join(files_to_process))
input("Press Enter to continue / CTRL-C to cancel.")
print(20*'-')

if not osp.exists(args.out_dir):
    os.makedirs(args.out_dir)

for file in files_to_process:
    print('Processing:', file)

    cmd = (f"whisper {osp.join(args.audio_dir, file)} --model {args.whisper_model}  "
           f"--language English --output_dir {args.out_dir} --verbose True "
           "--task transcribe --output_format txt")

    if args.print_only:
        print(cmd)
    else:
        subprocess.run(cmd.split(), check=True)

    print('Finished processing', file)
    print(20*'-')

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment