Last active
May 9, 2024 01:47
-
-
Save m0rph03nix/0967b82cb6e424df8afd60fdc983c857 to your computer and use it in GitHub Desktop.
Run Whisper.cpp in docker with mic audio streaming
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
FROM debian:11.7-slim AS build | |
RUN apt-get update \ | |
&& apt-get install -y libsdl2-dev alsa-utils g++ make wget | |
RUN mkdir /whisper && \ | |
wget -q https://github.com/ggerganov/whisper.cpp/tarball/master -O - | \ | |
tar -xz -C /whisper --strip-components 1 | |
WORKDIR /whisper | |
ARG model | |
RUN bash ./models/download-ggml-model.sh "${model}" | |
RUN make main stream | |
FROM debian:11.7-slim AS whisper | |
RUN apt-get update \ | |
&& apt-get install -y libsdl2-dev alsa-utils \ | |
&& apt-get clean \ | |
&& rm -rf /var/lib/apt/lists/* | |
WORKDIR /root | |
ARG model | |
RUN mkdir /root/models | |
COPY --from=build "/whisper/models/ggml-${model}.bin" "/root/models/ggml-${model}.bin" | |
COPY --from=build /whisper/main /usr/local/bin/whisper | |
COPY --from=build /whisper/stream /usr/local/bin/stream |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import docker | |
import re | |
from time import sleep | |
def parse_transcriptions(text): | |
start_tag = "### Transcription" | |
end_tag = "END" | |
regex_pattern = r"{} (\d+) {}".format(re.escape(start_tag), re.escape(end_tag)) | |
matches = re.findall(regex_pattern, text, re.MULTILINE) | |
transcriptions = [] | |
for match in matches: | |
start_index = text.find("{} {} START".format(start_tag, match)) | |
end_index = text.find("{} {} END".format(start_tag, match)) | |
if start_index == -1 or end_index == -1: | |
continue | |
start_index = text.find("\n", start_index) + 1 | |
end_index = text.rfind("\n", start_index, end_index) | |
transcription = text[start_index:end_index] | |
transcription = re.sub(r'\[.*?\]', '', transcription) # Ignore content between brackets | |
transcription = re.sub(r'\(.*?\)', '', transcription) # Ignore content between parentheses | |
transcriptions.append(transcription.strip()) | |
return transcriptions | |
def find_element_in_sentence(element_list, sentence): | |
for element in element_list: | |
pattern = r'\b{}\b'.format(re.escape(element)) | |
if re.search(pattern, sentence, re.IGNORECASE): | |
return element | |
return None | |
# Create a Docker client | |
client = docker.from_env() | |
# Get the container ID or name | |
container_id = 'whisper.cpp' | |
req = [] | |
while(1): | |
# Retrieve the logs | |
logs = client.containers.get(container_id).logs() | |
# Decode the logs from bytes to string | |
decoded_logs = logs.decode('utf-8') | |
transcriptions = parse_transcriptions(decoded_logs) | |
# Exemple d'utilisation | |
elements = ['coke', 'orange juice', 'beer', 'fanta'] | |
# Afficher les transcriptions extraites | |
for i, transcription in enumerate(transcriptions): | |
# print("Transcription {}:\n{}\n\n".format(i, transcription)) | |
print("\nDemande {}: ".format(i)) | |
result = find_element_in_sentence(elements, transcription) | |
if result: | |
print("\tThe guest wants a", result) | |
else: | |
print("\tThe guest was not understood !") | |
if len(req) < i+1: | |
req.append(transcriptions) | |
sleep(0.3) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
set -eu | |
MODEL="base" | |
LANG="en" | |
script_dir="$(realpath "$(dirname "${0}")")" | |
if [ ! $(docker images | grep -q "whisper-${MODEL}" ) ]; then | |
docker build -t whisper-${MODEL} --build-arg model=${MODEL} "${script_dir}" | |
fi | |
docker run --rm -it \ | |
--privileged -v /dev/bus/usb:/dev/bus/usb \ | |
whisper-${MODEL} \ | |
stream \ | |
--model /root/models/ggml-${MODEL}.bin \ | |
--language ${LANG} \ | |
--step 0 \ | |
-t 6 \ | |
--length 5000 \ | |
-vth 0.3 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment