Skip to content

Instantly share code, notes, and snippets.

@sberryman
Created April 20, 2020 14:52
Show Gist options
  • Save sberryman/1e87cccecb02e7493267a48548547caa to your computer and use it in GitHub Desktop.
Save sberryman/1e87cccecb02e7493267a48548547caa to your computer and use it in GitHub Desktop.
Montreal Forced Aligner Dockerfile
# FROM kaldiasr/kaldi:latest
FROM python:3.6-buster
WORKDIR "/workspace"
RUN apt-get clean \
&& apt-get update \
&& apt-get install -y wget \
automake autoconf sox libtool subversion libatlas3-base gfortran \
&& apt-get -y autoremove
# ADD requirements.txt requirements.txt
# RUN pip install -r requirements.txt
# kaldi (specific version!)
ENV KALDI_VERSION=094d22746b604fd20c2b8730966c9d0bc9f2170b
RUN cd /opt && \
git clone https://github.com/kaldi-asr/kaldi.git && \
cd kaldi && \
git checkout $KALDI_VERSION && \
cd tools && \
make -j $(NPROC) && \
make openblas && \
cd ../src && \
./configure --shared --openblas-root=/opt/kaldi/tools/OpenBLAS/install && \
make depend -j $(NPROC) && \
make -j 4
# OpenGrm-Ngram
ENV OPENGRM_NGRAM=1.3.4
RUN cd /opt && \
wget "http://www.opengrm.org/twiki/pub/GRM/NGramDownload/opengrm-ngram-$OPENGRM_NGRAM.tar.gz" && \
tar -xf "opengrm-ngram-$OPENGRM_NGRAM.tar.gz" && \
rm "opengrm-ngram-$OPENGRM_NGRAM.tar.gz" && \
cd "opengrm-ngram-$OPENGRM_NGRAM" && \
export LD_LIBRARY_PATH=/opt/kaldi/tools/openfst/lib && \
export CPLUS_INCLUDE_PATH=/opt/kaldi/tools/openfst/src/include && \
./configure --prefix=`pwd`/install && \
make -j 4 && \
make install
# Phonetisaurus
ENV PHONETISAURUS_VERSION=64719ca40c17cb70d810fffadac52c97984ca539
RUN cd /opt && \
git clone https://github.com/AdolfVonKleist/Phonetisaurus.git && \
cd Phonetisaurus && \
git checkout $PHONETISAURUS_VERSION && \
./configure \
--enable-static=no \
--with-openfst-includes=/opt/kaldi/tools/openfst/include \
--with-openfst-libs=/opt/kaldi/tools/openfst/lib && \
make -j 4
# aligner
ENV MONTREAL_ALIGNER_VERSION=1.0.1
RUN cd /opt && \
git clone https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner.git && \
cd Montreal-Forced-Aligner && \
python thirdparty/kaldi_binaries.py /opt/kaldi && \
python thirdparty/opengrm_ngram_binaries.py /opt/opengrm-ngram-$OPENGRM_NGRAM && \
python thirdparty/phonetisaurus_binaries.py /opt/Phonetisaurus && \
pip install -r requirements.txt && \
mkdir pretrained_models && \
cd pretrained_models && \
wget https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner/raw/dc09bb3d7302bc66eb8fdef543b44aa0fab61b07/pretrained_models/english.zip && \
cd ../ && \
python freezing/freeze.py
WORKDIR "/opt/Montreal-Forced-Aligner/dist/montreal-forced-aligner"
# only requirement is TextGridTools
RUN pip install tgt
CMD ["/bin/bash"]
# use a different docker image!
# make build_align && make run_align
# bin/mfa_align \
# /datasets/CommonVoice/en/speakers \
# /datasets/slr60/english.dict \
# /opt/Montreal-Forced-Aligner/dist/montreal-forced-aligner/pretrained_models/english.zip \
# /output/montreal-aligned/cv-en/
# bin/mfa_validate_dataset \
# /datasets/slr60/test-clean \
# /datasets/slr60/english.dict\
# english
import sys
import tgt
from pathlib import Path
from tqdm import tqdm
DATASET = 'dev-clean'
# DATASET = 'train-clean-100'
# DATASET = 'train-clean-360'
dataset_path = Path('/datasets/slr60/{}'.format(DATASET))
base_path = Path('/output/montreal-aligned/{}'.format(DATASET))
speaker_dirs = [f for f in base_path.glob("*") if f.is_dir()]
for speaker_dir in tqdm(speaker_dirs):
book_dirs = [f for f in speaker_dir.glob("*") if f.is_dir()]
for book_dir in book_dirs:
alignment_file = dataset_path.joinpath(
speaker_dir.stem,
book_dir.stem,
"{0}_{1}.alignment.txt".format(speaker_dir.stem, book_dir.stem)
)
with open(alignment_file, 'w', encoding='utf-8') as out_file:
# find our textgrid files
textgrid_files = sorted([f for f in book_dir.glob("*.TextGrid") if f.is_file()])
# process each grid file and add to our output
for textgrid_file in textgrid_files:
# read the raw transcript as well
transcript_file = dataset_path.joinpath(
speaker_dir.stem,
book_dir.stem,
"{0}.txt".format(textgrid_file.stem)
)
with open(transcript_file, 'r', encoding='utf-8') as in_file:
transcript = in_file.read()
# read the grid
input = tgt.io.read_textgrid(textgrid_file)
print("input: {}".format(input))
sys.exit(1)
# get all the word tiers
word_tier = input.get_tier_by_name('words')
out_file.write("{0} \"{1}\" \"{2}\" {3}\n".format(
textgrid_file.stem,
",".join(list(map(lambda interval: interval.text, word_tier.intervals))),
",".join(list(map(lambda interval: str(interval.end_time), word_tier.intervals))),
transcript
))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment