Skip to content

Instantly share code, notes, and snippets.

@m0rph03nix
Last active May 9, 2024 01:47
Show Gist options
  • Save m0rph03nix/0967b82cb6e424df8afd60fdc983c857 to your computer and use it in GitHub Desktop.
Save m0rph03nix/0967b82cb6e424df8afd60fdc983c857 to your computer and use it in GitHub Desktop.
Run Whisper.cpp in docker with mic audio streaming
FROM debian:11.7-slim AS build
RUN apt-get update \
&& apt-get install -y libsdl2-dev alsa-utils g++ make wget
RUN mkdir /whisper && \
wget -q https://github.com/ggerganov/whisper.cpp/tarball/master -O - | \
tar -xz -C /whisper --strip-components 1
WORKDIR /whisper
ARG model
RUN bash ./models/download-ggml-model.sh "${model}"
RUN make main stream
FROM debian:11.7-slim AS whisper
RUN apt-get update \
&& apt-get install -y libsdl2-dev alsa-utils \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /root
ARG model
RUN mkdir /root/models
COPY --from=build "/whisper/models/ggml-${model}.bin" "/root/models/ggml-${model}.bin"
COPY --from=build /whisper/main /usr/local/bin/whisper
COPY --from=build /whisper/stream /usr/local/bin/stream
import docker
import re
from time import sleep
def parse_transcriptions(text):
start_tag = "### Transcription"
end_tag = "END"
regex_pattern = r"{} (\d+) {}".format(re.escape(start_tag), re.escape(end_tag))
matches = re.findall(regex_pattern, text, re.MULTILINE)
transcriptions = []
for match in matches:
start_index = text.find("{} {} START".format(start_tag, match))
end_index = text.find("{} {} END".format(start_tag, match))
if start_index == -1 or end_index == -1:
continue
start_index = text.find("\n", start_index) + 1
end_index = text.rfind("\n", start_index, end_index)
transcription = text[start_index:end_index]
transcription = re.sub(r'\[.*?\]', '', transcription) # Ignore content between brackets
transcription = re.sub(r'\(.*?\)', '', transcription) # Ignore content between parentheses
transcriptions.append(transcription.strip())
return transcriptions
def find_element_in_sentence(element_list, sentence):
for element in element_list:
pattern = r'\b{}\b'.format(re.escape(element))
if re.search(pattern, sentence, re.IGNORECASE):
return element
return None
# Create a Docker client
client = docker.from_env()
# Get the container ID or name
container_id = 'whisper.cpp'
req = []
while(1):
# Retrieve the logs
logs = client.containers.get(container_id).logs()
# Decode the logs from bytes to string
decoded_logs = logs.decode('utf-8')
transcriptions = parse_transcriptions(decoded_logs)
# Exemple d'utilisation
elements = ['coke', 'orange juice', 'beer', 'fanta']
# Afficher les transcriptions extraites
for i, transcription in enumerate(transcriptions):
# print("Transcription {}:\n{}\n\n".format(i, transcription))
print("\nDemande {}: ".format(i))
result = find_element_in_sentence(elements, transcription)
if result:
print("\tThe guest wants a", result)
else:
print("\tThe guest was not understood !")
if len(req) < i+1:
req.append(transcriptions)
sleep(0.3)
#!/usr/bin/env bash
set -eu
MODEL="base"
LANG="en"
script_dir="$(realpath "$(dirname "${0}")")"
if [ ! $(docker images | grep -q "whisper-${MODEL}" ) ]; then
docker build -t whisper-${MODEL} --build-arg model=${MODEL} "${script_dir}"
fi
docker run --rm -it \
--privileged -v /dev/bus/usb:/dev/bus/usb \
whisper-${MODEL} \
stream \
--model /root/models/ggml-${MODEL}.bin \
--language ${LANG} \
--step 0 \
-t 6 \
--length 5000 \
-vth 0.3
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment