Skip to content

Instantly share code, notes, and snippets.

@sappho192
Created July 8, 2024 08:12
Show Gist options
  • Save sappho192/430c937713fbdccf608edb971767afb4 to your computer and use it in GitHub Desktop.
Save sappho192/430c937713fbdccf608edb971767afb4 to your computer and use it in GitHub Desktop.
Speaker Diarization evaluation using PyAnnote pipeline
AUDIO_FILE = "sample.wav"
REFERENCE = "sample.rttm"
# Clone the https://github.com/pyannote/pyannote-audio and change the path correctly
ROOT_DIR = "D:/REPO/pyannote-audio"
AUDIO_FILE = f"{ROOT_DIR}/tutorials/assets/sample.wav"
REFERENCE = f"{ROOT_DIR}/tutorials/assets/sample.rttm"
from huggingface_hub import HfApi
available_pipelines = [p.modelId for p in HfApi().list_models(filter="pyannote-audio-pipeline")]
list(filter(lambda p: p.startswith("pyannote/"), available_pipelines))
from huggingface_hub import notebook_login
notebook_login()
from pyannote.audio import Pipeline
pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization-3.1", use_auth_token=True)
diarized = pipeline(AUDIO_FILE)
from pyannote.core import Annotation
assert isinstance(diarized, Annotation)
for speech_turn, track, speaker in diarized.itertracks(yield_label=True):
print(f"{speech_turn.start:4.1f} {speech_turn.end:4.1f} {speaker}")
# we visualize [0, 30] time range
from pyannote.core import notebook, Segment
notebook.crop = Segment(0, 30)
print(diarized)
from pyannote.database.util import load_rttm
reference = load_rttm(REFERENCE)["sample"]
# map hypothesized and reference speakers for visualization purposes
pipeline.optimal_mapping(dia, reference)
# Evaluation
from pyannote.metrics.diarization import DiarizationErrorRate
metric_der = DiarizationErrorRate(collar=0.25)
metric_result = metric_der(reference=reference, hypothesis=dia, detailed=True)
print(metric_result)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment