rrajasek95/convert-wav.sh

## convert-wav.sh
#!/usr/bin/env bash
source functions.sh

folder=$1
find $folder -name "*.mp3" | while read file;
do
    basename=${file%.mp3}
    echo "Converting $basename"
    f_mp3towav $file $basename".wav"
done

## create-jsgf.py
'''
filename:
author: rishi
date_created: 22/6/17
'''

import sys

import os
folder_list = set(os.listdir("words"))

with open('subset_words.dict', 'r') as f:
    for line in f:
        tokens = line.strip().lower().split(maxsplit=1)
        if tokens[0] not in folder_list:
            continue
        with open('words/{0}/{0}-align.jsgf'.format(tokens[0]), 'w') as w:
            w.write('#JSGF V1.0;\ngrammar forcing;\npublic <{}> = sil {} [ sil ];\n'.format(tokens[0], tokens[1]))

## create-normalign.sh
#!/usr/bin/env bash

ls $1 | while read word; do
    # Get the dict entry for the word
    line=$(grep "^$word\b" subset_words.dict)
    if [ ! -z "$line" ]; then
        # get the phonemes
        phoneme_regex="^("$(echo $line | tr A-Z a-z | cut -d' ' -f2- | tr " " "|")")"
        rm "words/"$word"/"$word"-normalign.txt"
        touch "words/"$word"/"$word"-normalign.txt"
        # for each align file, get the numerical data
        find "words/"$word -name "*-align.txt" | while read f; do
            echo $f
            awk -v pregex="$phoneme_regex" '$0 ~ pregex {printf "%2s %6.3f %6.3f ", $1, -log(1-$5), log($3-$2+1)} END {print FILENAME}' $f | tee -a "words/"$word"/"$word"-normalign.txt"
        done
    fi
done

## create-sentence-alignments.sh
#!/usr/bin/env bash

ls sentences | while read utterance; do
    jsgf_file=$utterance"-align.jsgf"
    path="sentences/"$utterance"/"$jsgf_file
    if [ -f "$path" ]; then
        line=$(cat $path | grep -o "sil.* sil")
        phonemes=${line:4:-6}
        phoneme_regex=$(echo $phonemes | tr " " "\n" | sort | uniq | tr "\n" "|")
        phoneme_regex="^("${phoneme_regex::-1}")"
        rm "sentences/"$utterance"/"$utterance"-alignments.txt"
        touch "sentences/"$utterance"/"$utterance"-alignments.txt"
        find "sentences/"$utterance -name "*-align.txt" | while read f; do
            echo $f
             awk -v pregex="$phoneme_regex" '$0 ~ pregex {printf "%2s %5d %4.2f ", $1, $5, ($3-$2)/100.0} END {print FILENAME}' $f >> "sentences/"$utterance"/"$utterance"-alignments.txt"
        done
    fi
done

## create-sentence-jsgf.py
'''
filename:
author: rishi
date_created: 23/6/17
'''

import sys

import os
folder_list = set(os.listdir("sentences"))
folder_list.remove("voice_tag_sentence.csv")

word_phonemes_map = {}

with open('subset_words.dict', 'r') as f:
    for line in f:
        token = line.strip().lower().split(maxsplit=1)
        word_phonemes_map[token[0]] = token[1]

for folder in folder_list:
    words = folder.split('-')
    missing_words = []
    word_list = []
    for word in words:
        if word not in word_phonemes_map:
            if word[-1] == "s": # Probable possessive
                if word[:-1] + '\'s' in word_phonemes_map:
                    word_list.append(word[:-1] + '\'s')
            elif word[-2:] == "re": # Probable we're they're
                if word[:-2] + '\'re' in word_phonemes_map:
                    word_list.append(word[:-2] + '\'re')
            elif word[-2:] == "ll": # Probable we'll, i'll etc
                if word[:-2] + '\'ll' in word_phonemes_map:
                    word_list.append(word[:-2] + '\'ll')
            elif word[-2:] == "nt": #Probable isn't
                if word[:-1] + '\'t' in word_phonemes_map:
                    word_list.append(word[:-1] + '\'t')
            elif word[-2:] == "ve":
                if word[:-2] + '\'ve' in word_phonemes_map:
                    word_list.append(word[:-2] + '\'ve')
            elif word == "oclock":
                word_list.append("o'clock")
            elif word == "schoolbag":
                word_list.append("school")
                word_list.append("bag")
            elif word == "beanstalk":
                word_list.append("bean")
                word_list.append("stalk")
            elif word == "colourful":
                word_list.append("colorful")
            elif word == "kungfu":
                word_list.append("kung")
                word_list.append("fu")
            elif word == "sandcastle":
                word_list.append("sand")
                word_list.append("castle")
            else:
                missing_words.append(word)
        else:
            word_list.append(word)
    if missing_words:
        print("Words missing:", missing_words)
    else:
        utterance = " ".join(word_list)
        phoneme_list = [word_phonemes_map[word] for word in word_list]
        phoneme_string = " ".join(phoneme_list)
        jsgf_content = "#JSGF V1.0;\ngrammar forcing;\npublic <{}> = sil {} [ sil ];\n".format(utterance, phoneme_string)
        with open('sentences/{0}/{0}-align.jsgf'.format(folder), 'w') as w:
            w.write(jsgf_content)

## create-standards.sh
#!/usr/bin/env bash

ls $1 | while read word; do
    if [ -f "words/"$word"/"$word"-normalign.txt" ]; then
        grep ' ' "words/"$word"/"$word"-normalign.txt" | awk '{if (!mnf || NF<mnf) {mnf=NF}; for (f=1; f<NF; f++) {i[NR,f]=$f; if ((f-1) % 3) {m[f]+=$f; d[f]=$f-a[f]; a[f]+=d[f]/NR; m2[f]+=d[f]*($f-a[f])}}; i[NR,0]=$NF} END {print "Means and standard deviations of acoustic scores and durations for each phoneme:"; for (f=1; f<mnf; f++) {if ((f-1) % 3) {printf "%5.3f %5.3f ", m[f]/NR, sqrt(m2[f]/NR)} else {printf "%s ", $f}}; print "\n\nStandard scores of acoustic scores and durations for each scored utterance:"; for (r=1; r<=NR; r++) {for (f=1; f<mnf; f++) {if ((f-1) % 3) {printf "%+6.3f ", (i[r,f]-(m[f]/NR))/sqrt(m2[f]/NR)} else {printf "%s ", i[r,f]}}; print i[r,0]}}' > "words/"$word"/"$word"-standards.txt"
    fi
done

## create-subset-dict.py
'''
filename:
author: rishi
date_created: 23/6/17
'''
import os

word_list = []

word_phonemes_map = {}

with open('../cmudict.dict', 'r') as dictionary_file:
    for line in dictionary_file:
        pair = (word, phones) = line.strip().split(maxsplit=1)
        word_list.append((word, phones.replace("1", "").replace("0", "").replace("2", "")))
        token = line.strip().lower().split(maxsplit=1)
        word_phonemes_map[token[0]] = token[1]

words = set(os.listdir("words"))

sentences = set(os.listdir("sentences"))
sentence_words = set()
for sentence in sentences:
    for word in sentence.split('-'):
        if word not in word_phonemes_map:
            if word[-1] == "s": # Probable possessive
                if word[:-1] + '\'s' in word_phonemes_map:
                    word = word[:-1] + '\'s'
            elif word[-2:] == "re": # Probable we're they're
                if word[:-2] + '\'re' in word_phonemes_map:
                    word = word[:-2] + '\'re'
            elif word[-2:] == "ll": # Probable we'll, i'll etc
                if word[:-2] + '\'ll' in word_phonemes_map:
                    word = word[:-2] + '\'ll'
            elif word[-2:] == "nt": #Probable isn't
                if word[:-1] + '\'t' in word_phonemes_map:
                    word = word[:-1] + '\'t'
            elif word[-2:] == "ve":
                if word[:-2] + '\'ve' in word_phonemes_map:
                    word = word[:-2] + '\'ve'
            elif word == "oclock":
                word = "o'clock"

            elif word == "schoolbag":
                sentence_words.add("school")
                sentence_words.add("bag")
            elif word == "beanstalk":
                sentence_words.add("bean")
                sentence_words.add("stalk")
            elif word == "colourful":
                sentence_words.add("colorful")
            elif word == "kungfu":
                sentence_words.add("kung")
                sentence_words.add("fu")
            elif word == "sandcastle":
                sentence_words.add("sand")
                sentence_words.add("castle")
        sentence_words.add(word)


words = words | sentence_words

subset_word_list = [(word, phones) for (word, phones) in word_list if word in words]
subset_word_list.append(("sil", "SIL")) # Handling silence the grammar
with open('subset_words.dict', 'w') as dictionary_file:
    for (word, phones) in subset_word_list:
        dictionary_file.write(word + " " + phones+"\n")

## functions.sh
#!/usr/bin/env bash

function f_mp3towav {
    mpg123 -w $2 $1
}

function f_download_audio_to_folder {
    filename=$1
    url=$2
    folder=$3
    if [ ! -d $folder ]; then
        mkdir -p $folder
    fi

    wget -O $folder/$filename $url
}

function f_force_align {
    audio=$1
    align_jsgf=$2
    phoneme_dict=$3
    echo $audo
    pocketsphinx_continuous -infile $audio -jsgf $align_jsgf -dict $phoneme_dict -backtrace yes -fsgusefiller no -bestpath no -wbeam 1e-56 -beam 1e-57 2>&1 | tee $audio"-align.txt"
}
	#!/usr/bin/env bash
	source functions.sh

	folder=$1
	find $folder -name "*.mp3" \| while read file;
	do
	basename=${file%.mp3}
	echo "Converting $basename"
	f_mp3towav $file $basename".wav"
	done
	'''
	filename:
	author: rishi
	date_created: 22/6/17
	'''

	import sys

	import os
	folder_list = set(os.listdir("words"))

	with open('subset_words.dict', 'r') as f:
	for line in f:
	tokens = line.strip().lower().split(maxsplit=1)
	if tokens[0] not in folder_list:
	continue
	with open('words/{0}/{0}-align.jsgf'.format(tokens[0]), 'w') as w:
	w.write('#JSGF V1.0;\ngrammar forcing;\npublic <{}> = sil {} [ sil ];\n'.format(tokens[0], tokens[1]))
	#!/usr/bin/env bash

	ls $1 \| while read word; do
	# Get the dict entry for the word
	line=$(grep "^$word\b" subset_words.dict)
	if [ ! -z "$line" ]; then
	# get the phonemes
	phoneme_regex="^("$(echo $line \| tr A-Z a-z \| cut -d' ' -f2- \| tr " " "\|")")"
	rm "words/"$word"/"$word"-normalign.txt"
	touch "words/"$word"/"$word"-normalign.txt"
	# for each align file, get the numerical data
	find "words/"$word -name "*-align.txt" \| while read f; do
	echo $f
	awk -v pregex="$phoneme_regex" '$0 ~ pregex {printf "%2s %6.3f %6.3f ", $1, -log(1-$5), log($3-$2+1)} END {print FILENAME}' $f \| tee -a "words/"$word"/"$word"-normalign.txt"
	done
	fi
	done
	#!/usr/bin/env bash

	ls sentences \| while read utterance; do
	jsgf_file=$utterance"-align.jsgf"
	path="sentences/"$utterance"/"$jsgf_file
	if [ -f "$path" ]; then
	line=$(cat $path \| grep -o "sil.* sil")
	phonemes=${line:4:-6}
	phoneme_regex=$(echo $phonemes \| tr " " "\n" \| sort \| uniq \| tr "\n" "\|")
	phoneme_regex="^("${phoneme_regex::-1}")"
	rm "sentences/"$utterance"/"$utterance"-alignments.txt"
	touch "sentences/"$utterance"/"$utterance"-alignments.txt"
	find "sentences/"$utterance -name "*-align.txt" \| while read f; do
	echo $f
	awk -v pregex="$phoneme_regex" '$0 ~ pregex {printf "%2s %5d %4.2f ", $1, $5, ($3-$2)/100.0} END {print FILENAME}' $f >> "sentences/"$utterance"/"$utterance"-alignments.txt"
	done
	fi
	done
	'''
	filename:
	author: rishi
	date_created: 23/6/17
	'''

	import sys

	import os
	folder_list = set(os.listdir("sentences"))
	folder_list.remove("voice_tag_sentence.csv")

	word_phonemes_map = {}

	with open('subset_words.dict', 'r') as f:
	for line in f:
	token = line.strip().lower().split(maxsplit=1)
	word_phonemes_map[token[0]] = token[1]

	for folder in folder_list:
	words = folder.split('-')
	missing_words = []
	word_list = []
	for word in words:
	if word not in word_phonemes_map:
	if word[-1] == "s": # Probable possessive
	if word[:-1] + '\'s' in word_phonemes_map:
	word_list.append(word[:-1] + '\'s')
	elif word[-2:] == "re": # Probable we're they're
	if word[:-2] + '\'re' in word_phonemes_map:
	word_list.append(word[:-2] + '\'re')
	elif word[-2:] == "ll": # Probable we'll, i'll etc
	if word[:-2] + '\'ll' in word_phonemes_map:
	word_list.append(word[:-2] + '\'ll')
	elif word[-2:] == "nt": #Probable isn't
	if word[:-1] + '\'t' in word_phonemes_map:
	word_list.append(word[:-1] + '\'t')
	elif word[-2:] == "ve":
	if word[:-2] + '\'ve' in word_phonemes_map:
	word_list.append(word[:-2] + '\'ve')
	elif word == "oclock":
	word_list.append("o'clock")
	elif word == "schoolbag":
	word_list.append("school")
	word_list.append("bag")
	elif word == "beanstalk":
	word_list.append("bean")
	word_list.append("stalk")
	elif word == "colourful":
	word_list.append("colorful")
	elif word == "kungfu":
	word_list.append("kung")
	word_list.append("fu")
	elif word == "sandcastle":
	word_list.append("sand")
	word_list.append("castle")
	else:
	missing_words.append(word)
	else:
	word_list.append(word)
	if missing_words:
	print("Words missing:", missing_words)
	else:
	utterance = " ".join(word_list)
	phoneme_list = [word_phonemes_map[word] for word in word_list]
	phoneme_string = " ".join(phoneme_list)
	jsgf_content = "#JSGF V1.0;\ngrammar forcing;\npublic <{}> = sil {} [ sil ];\n".format(utterance, phoneme_string)
	with open('sentences/{0}/{0}-align.jsgf'.format(folder), 'w') as w:
	w.write(jsgf_content)
	#!/usr/bin/env bash

	ls $1 \| while read word; do
	if [ -f "words/"$word"/"$word"-normalign.txt" ]; then
	grep ' ' "words/"$word"/"$word"-normalign.txt" \| awk '{if (!mnf \|\| NF<mnf) {mnf=NF}; for (f=1; f<NF; f++) {i[NR,f]=$f; if ((f-1) % 3) {m[f]+=$f; d[f]=$f-a[f]; a[f]+=d[f]/NR; m2[f]+=d[f]*($f-a[f])}}; i[NR,0]=$NF} END {print "Means and standard deviations of acoustic scores and durations for each phoneme:"; for (f=1; f<mnf; f++) {if ((f-1) % 3) {printf "%5.3f %5.3f ", m[f]/NR, sqrt(m2[f]/NR)} else {printf "%s ", $f}}; print "\n\nStandard scores of acoustic scores and durations for each scored utterance:"; for (r=1; r<=NR; r++) {for (f=1; f<mnf; f++) {if ((f-1) % 3) {printf "%+6.3f ", (i[r,f]-(m[f]/NR))/sqrt(m2[f]/NR)} else {printf "%s ", i[r,f]}}; print i[r,0]}}' > "words/"$word"/"$word"-standards.txt"
	fi
	done
	#!/usr/bin/env bash

	function f_mp3towav {
	mpg123 -w $2 $1
	}

	function f_download_audio_to_folder {
	filename=$1
	url=$2
	folder=$3
	if [ ! -d $folder ]; then
	mkdir -p $folder
	fi

	wget -O $folder/$filename $url
	}

	function f_force_align {
	audio=$1
	align_jsgf=$2
	phoneme_dict=$3
	echo $audo
	pocketsphinx_continuous -infile $audio -jsgf $align_jsgf -dict $phoneme_dict -backtrace yes -fsgusefiller no -bestpath no -wbeam 1e-56 -beam 1e-57 2>&1 \| tee $audio"-align.txt"
	}