Skip to content

Instantly share code, notes, and snippets.

@psibre
Last active August 7, 2017 21:39
Show Gist options
  • Save psibre/3c42fb0767424c6fa249a07ae2721f10 to your computer and use it in GitHub Desktop.
Save psibre/3c42fb0767424c6fa249a07ae2721f10 to your computer and use it in GitHub Desktop.
Dockerized Kaldi-based Montreal Forced Alignment logic for MaryTTS voicebuilding
apply plugin: ForcedAlignPlugin
class ForcedAlignPlugin implements Plugin<Project> {
void apply(Project project) {
project.task('prepareForcedAlignment', type: PrepareForcedAlignment) {
dependsOn project.generateAllophones, project.wav
maryXmlDir = project.file(project.generateAllophones.destDir)
wavDir = project.file(project.wav.destDir)
forcedAlignmentDir = project.file("$project.buildDir/forcedAlignment")
}
project.task('runForcedAlignment', type: RunForcedAlignment) {
dependsOn project.prepareForcedAlignment
forcedAlignmentDir = project.file("$project.buildDir/forcedAlignment")
}
}
}
class PrepareForcedAlignment extends DefaultTask {
@InputDirectory
File maryXmlDir
@InputDirectory
File wavDir
@OutputDirectory
File forcedAlignmentDir
@TaskAction
void prepare() {
def dict = [:]
project.fileTree(maryXmlDir).include('*.xml').each { xmlFile ->
def tokens = []
new XmlSlurper().parse(xmlFile).'**'.findAll { it.name() == 't' }.each { token ->
def phonemes = token.'**'.findAll { it.name() == 'syllable' }.collect { syllable ->
syllable.ph.collect { it.@p }
}.flatten().join(' ')
if (phonemes) {
dict[token.text()] = phonemes
tokens << token.text()
}
}
project.file("$forcedAlignmentDir/${xmlFile.name - '.xml' + '.lab'}").withWriter('UTF-8') { out ->
out.println tokens.join(' ')
}
}
project.file("$forcedAlignmentDir/dict.txt").withWriter('UTF-8') { out ->
dict.toSorted { it.key.toLowerCase() }.each { word, phonemes ->
out.println "$word $phonemes"
}
}
project.copy {
from wavDir
into forcedAlignmentDir
include '*.wav'
}
}
}
class RunForcedAlignment extends DefaultTask {
@Input
String forcedAlignmentDir
@OutputDirectory
File destDir = project.file("$project.buildDir/TextGrid")
@TaskAction
void run() {
project.exec {
commandLine 'docker', 'run', '--rm', '-v', "$forcedAlignmentDir:/data", '-t', 'psibre/kaldi-mfa', '/mfa/dist/montreal-forced-aligner/bin/mfa_train_and_align', '/data', '/data/dict.txt', '/data'
}
project.copy {
from forcedAlignmentDir
into destDir
include '*.TextGrid'
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment