Skip to content

Instantly share code, notes, and snippets.

@rrajasek95
Last active July 26, 2017 18:17
Show Gist options
  • Save rrajasek95/8355070a870d5eea94f4c3fce210168d to your computer and use it in GitHub Desktop.
Save rrajasek95/8355070a870d5eea94f4c3fce210168d to your computer and use it in GitHub Desktop.
Data Processing of recordings for Transcriber Qualification
#!/usr/bin/env bash
source functions.sh
folder=$1
find $folder -name "*.mp3" | while read file;
do
basename=${file%.mp3}
echo "Converting $basename"
f_mp3towav $file $basename".wav"
done
'''
filename:
author: rishi
date_created: 22/6/17
'''
import sys
import os
folder_list = set(os.listdir("words"))
with open('subset_words.dict', 'r') as f:
for line in f:
tokens = line.strip().lower().split(maxsplit=1)
if tokens[0] not in folder_list:
continue
with open('words/{0}/{0}-align.jsgf'.format(tokens[0]), 'w') as w:
w.write('#JSGF V1.0;\ngrammar forcing;\npublic <{}> = sil {} [ sil ];\n'.format(tokens[0], tokens[1]))
#!/usr/bin/env bash
ls $1 | while read word; do
# Get the dict entry for the word
line=$(grep "^$word\b" subset_words.dict)
if [ ! -z "$line" ]; then
# get the phonemes
phoneme_regex="^("$(echo $line | tr A-Z a-z | cut -d' ' -f2- | tr " " "|")")"
rm "words/"$word"/"$word"-normalign.txt"
touch "words/"$word"/"$word"-normalign.txt"
# for each align file, get the numerical data
find "words/"$word -name "*-align.txt" | while read f; do
echo $f
awk -v pregex="$phoneme_regex" '$0 ~ pregex {printf "%2s %6.3f %6.3f ", $1, -log(1-$5), log($3-$2+1)} END {print FILENAME}' $f | tee -a "words/"$word"/"$word"-normalign.txt"
done
fi
done
#!/usr/bin/env bash
ls sentences | while read utterance; do
jsgf_file=$utterance"-align.jsgf"
path="sentences/"$utterance"/"$jsgf_file
if [ -f "$path" ]; then
line=$(cat $path | grep -o "sil.* sil")
phonemes=${line:4:-6}
phoneme_regex=$(echo $phonemes | tr " " "\n" | sort | uniq | tr "\n" "|")
phoneme_regex="^("${phoneme_regex::-1}")"
rm "sentences/"$utterance"/"$utterance"-alignments.txt"
touch "sentences/"$utterance"/"$utterance"-alignments.txt"
find "sentences/"$utterance -name "*-align.txt" | while read f; do
echo $f
awk -v pregex="$phoneme_regex" '$0 ~ pregex {printf "%2s %5d %4.2f ", $1, $5, ($3-$2)/100.0} END {print FILENAME}' $f >> "sentences/"$utterance"/"$utterance"-alignments.txt"
done
fi
done
'''
filename:
author: rishi
date_created: 23/6/17
'''
import sys
import os
folder_list = set(os.listdir("sentences"))
folder_list.remove("voice_tag_sentence.csv")
word_phonemes_map = {}
with open('subset_words.dict', 'r') as f:
for line in f:
token = line.strip().lower().split(maxsplit=1)
word_phonemes_map[token[0]] = token[1]
for folder in folder_list:
words = folder.split('-')
missing_words = []
word_list = []
for word in words:
if word not in word_phonemes_map:
if word[-1] == "s": # Probable possessive
if word[:-1] + '\'s' in word_phonemes_map:
word_list.append(word[:-1] + '\'s')
elif word[-2:] == "re": # Probable we're they're
if word[:-2] + '\'re' in word_phonemes_map:
word_list.append(word[:-2] + '\'re')
elif word[-2:] == "ll": # Probable we'll, i'll etc
if word[:-2] + '\'ll' in word_phonemes_map:
word_list.append(word[:-2] + '\'ll')
elif word[-2:] == "nt": #Probable isn't
if word[:-1] + '\'t' in word_phonemes_map:
word_list.append(word[:-1] + '\'t')
elif word[-2:] == "ve":
if word[:-2] + '\'ve' in word_phonemes_map:
word_list.append(word[:-2] + '\'ve')
elif word == "oclock":
word_list.append("o'clock")
elif word == "schoolbag":
word_list.append("school")
word_list.append("bag")
elif word == "beanstalk":
word_list.append("bean")
word_list.append("stalk")
elif word == "colourful":
word_list.append("colorful")
elif word == "kungfu":
word_list.append("kung")
word_list.append("fu")
elif word == "sandcastle":
word_list.append("sand")
word_list.append("castle")
else:
missing_words.append(word)
else:
word_list.append(word)
if missing_words:
print("Words missing:", missing_words)
else:
utterance = " ".join(word_list)
phoneme_list = [word_phonemes_map[word] for word in word_list]
phoneme_string = " ".join(phoneme_list)
jsgf_content = "#JSGF V1.0;\ngrammar forcing;\npublic <{}> = sil {} [ sil ];\n".format(utterance, phoneme_string)
with open('sentences/{0}/{0}-align.jsgf'.format(folder), 'w') as w:
w.write(jsgf_content)
#!/usr/bin/env bash
ls $1 | while read word; do
if [ -f "words/"$word"/"$word"-normalign.txt" ]; then
grep ' ' "words/"$word"/"$word"-normalign.txt" | awk '{if (!mnf || NF<mnf) {mnf=NF}; for (f=1; f<NF; f++) {i[NR,f]=$f; if ((f-1) % 3) {m[f]+=$f; d[f]=$f-a[f]; a[f]+=d[f]/NR; m2[f]+=d[f]*($f-a[f])}}; i[NR,0]=$NF} END {print "Means and standard deviations of acoustic scores and durations for each phoneme:"; for (f=1; f<mnf; f++) {if ((f-1) % 3) {printf "%5.3f %5.3f ", m[f]/NR, sqrt(m2[f]/NR)} else {printf "%s ", $f}}; print "\n\nStandard scores of acoustic scores and durations for each scored utterance:"; for (r=1; r<=NR; r++) {for (f=1; f<mnf; f++) {if ((f-1) % 3) {printf "%+6.3f ", (i[r,f]-(m[f]/NR))/sqrt(m2[f]/NR)} else {printf "%s ", i[r,f]}}; print i[r,0]}}' > "words/"$word"/"$word"-standards.txt"
fi
done
'''
filename:
author: rishi
date_created: 23/6/17
'''
import os
word_list = []
word_phonemes_map = {}
with open('../cmudict.dict', 'r') as dictionary_file:
for line in dictionary_file:
pair = (word, phones) = line.strip().split(maxsplit=1)
word_list.append((word, phones.replace("1", "").replace("0", "").replace("2", "")))
token = line.strip().lower().split(maxsplit=1)
word_phonemes_map[token[0]] = token[1]
words = set(os.listdir("words"))
sentences = set(os.listdir("sentences"))
sentence_words = set()
for sentence in sentences:
for word in sentence.split('-'):
if word not in word_phonemes_map:
if word[-1] == "s": # Probable possessive
if word[:-1] + '\'s' in word_phonemes_map:
word = word[:-1] + '\'s'
elif word[-2:] == "re": # Probable we're they're
if word[:-2] + '\'re' in word_phonemes_map:
word = word[:-2] + '\'re'
elif word[-2:] == "ll": # Probable we'll, i'll etc
if word[:-2] + '\'ll' in word_phonemes_map:
word = word[:-2] + '\'ll'
elif word[-2:] == "nt": #Probable isn't
if word[:-1] + '\'t' in word_phonemes_map:
word = word[:-1] + '\'t'
elif word[-2:] == "ve":
if word[:-2] + '\'ve' in word_phonemes_map:
word = word[:-2] + '\'ve'
elif word == "oclock":
word = "o'clock"
elif word == "schoolbag":
sentence_words.add("school")
sentence_words.add("bag")
elif word == "beanstalk":
sentence_words.add("bean")
sentence_words.add("stalk")
elif word == "colourful":
sentence_words.add("colorful")
elif word == "kungfu":
sentence_words.add("kung")
sentence_words.add("fu")
elif word == "sandcastle":
sentence_words.add("sand")
sentence_words.add("castle")
sentence_words.add(word)
words = words | sentence_words
subset_word_list = [(word, phones) for (word, phones) in word_list if word in words]
subset_word_list.append(("sil", "SIL")) # Handling silence the grammar
with open('subset_words.dict', 'w') as dictionary_file:
for (word, phones) in subset_word_list:
dictionary_file.write(word + " " + phones+"\n")
#!/usr/bin/env bash
function f_mp3towav {
mpg123 -w $2 $1
}
function f_download_audio_to_folder {
filename=$1
url=$2
folder=$3
if [ ! -d $folder ]; then
mkdir -p $folder
fi
wget -O $folder/$filename $url
}
function f_force_align {
audio=$1
align_jsgf=$2
phoneme_dict=$3
echo $audo
pocketsphinx_continuous -infile $audio -jsgf $align_jsgf -dict $phoneme_dict -backtrace yes -fsgusefiller no -bestpath no -wbeam 1e-56 -beam 1e-57 2>&1 | tee $audio"-align.txt"
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment