Skip to content

Instantly share code, notes, and snippets.

@gongcastro
Last active August 13, 2021 11:54
Show Gist options
  • Save gongcastro/173e8a378c4106bddc9bb3d18357beba to your computer and use it in GitHub Desktop.
Save gongcastro/173e8a378c4106bddc9bb3d18357beba to your computer and use it in GitHub Desktop.
# get audio duration
# set up ----
library(audio)
library(purrr)
library(dplyr)
library(tidyr)
library(ggplot2)
library(PraatR)
library(stringr)
# create functions ----
# load audios from repository
load_audios <- function(
path, # path to folder containing the audios
format = "wav" # audio format (must be "wav")
){
if (!(format %in% "wav")) stop("format must be 'wav'")
file_paths <- list.files(path, full.names = TRUE)
file_names <- list.files(path, full.names = FALSE)
x <- map(file_paths, function(x) as.double(load.wave(x))) %>% set_names(file_names)
return(x)
}
# this will return a data frame of durations (one row per audio)
get_audio_duration <- function(
path, # path to folder containing the audios
sampling_rate = 44000, # recording sampling rate (in Hz)
units = "s" # time unit (s for seconds, ms for milliseconds)
){
audios <- load_audios(path)
if (units %in% "ms") sampling_rate <- sampling_rate/1000
x <- map_dbl(audios, length)/sampling_rate/2
x <- data.frame(audio = names(x), duration = x)
row.names(x) <- NULL
return(x)
}
# this will return a data frame with one amplitude measure per time point for each audio
# multiple rows per audio, and two rows per time point (positive and negative amplitude)
get_audio_amplitude <- function(
path, # path to folder containing the audios
sampling_rate = 44000, # recording sampling rate (in Hz)
units = "s" # time unit (s for seconds, ms for milliseconds)
){
audios <- load_audios(path)
x <- map(audios, function(x){
data.frame(amplitude = x) %>%
mutate(time = 1:nrow(.)/sampling_rate/2)
})
x <- bind_rows(x, .id = "audio")
x <- x[, c("audio", "time", "amplitude")]
return(x)
}
get_audio_pitch <- function(
path, # path to folder containing the audios
time_step = 0.001, # time step in seconds (0.0 = auto)
pitch_limits = c(95, 800) # pitch floor and ceiling in Hz
){
file_paths <- list.files(path, full.names = TRUE)
file_names <- list.files(path, full.names = FALSE)
pitch_args <- list(time_step, pitch_limits[1], pitch_limits[2])
pitch_paths <- paste0(tempdir(), "\\", file_names)
pitch_tier_paths <- paste0(tempdir(), "\\", file_names)
map2(
.x = file_paths, .y = pitch_paths,
~praat(
"To Pitch...", list(time_step, pitch_limits[1], pitch_limits[2]),
input = .x, output = .y, overwrite = TRUE
)
)
map2(
.x = pitch_paths, .y = pitch_tier_paths,
~praat(
"Down to PitchTier",
input = .x, output = .y, overwrite = TRUE, filetype = "headerless spreadsheet"
)
)
x <- map(
pitch_tier_paths,
~read.table(., col.names = c("time", "f0"))) %>% # read the .Pitch object from the pitch.tier.path
set_names(file_names) %>%
bind_rows(.id = "audio")
return(x)
}
# get pitch local maxima for each audio
get_pitch_maxima <- function(
pitch # output of get_audio_pitch
){
x <- group_split(pitch, audio)
x <- x %>%
map(., "f0") %>%
map(., ~which(c(NA, ., NA) > c(., NA, NA) & c(NA, ., NA) > c(NA, NA, .))) %>% # get indices of local maxima
map(`-`, 1) %>%
map2(x, ., ~slice(.x, .y)) %>%
map_df(., bind_rows) # merge datasets of all audios into one
return(x)
}
# get formants
get_audio_formants <- function(
path,
max_formants = 2, # maximum number of formats to extract, e.g. if 2, F1 and F2 are extracted
time_step = 0.001, # time step (s)
max_frequency = 10000, # maximum formant (Hz)
time_window_length = 0.025, # window length (s)
pre_emphasis = 50, # pre-emphasis from (Hz)
long_format = FALSE # should table be on long format? (values from F1 and F2 in the same column)
){
file_paths <- list.files(path, full.names = TRUE)
file_names <- list.files(path, full.names = FALSE)
formant_args <- list(time_step, max_formants, max_frequency, time_window_length, pre_emphasis)
formant_paths <- paste0(tempdir(), "\\", file_names)
formant_tab_paths <- paste0(tempdir(), "\\", file_names)
# retrieve formants from .wav files
map2(
.x = file_paths, .y = formant_paths,
~praat(
"To Formant (burg)...",
arguments = formant_args, # take the arguments specified above as a list
input = .x, # paths to the audio files
output = .y, # paths for the resulting paths of the .Pitch files
overwrite = TRUE # overwrite the files in the outcome folder if the function is run again
)
)
formant_tab_args <- list(
FALSE, # include frame number
TRUE, # include time
3, # time decimals
FALSE, # include intensity
3, # intensity decimals
FALSE, # include number of formants
3, # frequency decimals
FALSE # include bandwidths
)
map2(
.x = formant_paths, .y = formant_tab_paths,
~praat(
"Down to Table...",
arguments = formant_tab_args, # take the arguments specified above as a list
input = .x, # paths to the audio files
output = .y, # paths for the resulting paths of the .Pitch files
filetype = "tab-separated", # this format is easier to read
overwrite = TRUE
)
)
x <- map(
formant_tab_paths,
function(x){
read.table(x, header = TRUE, sep = "\t", na.strings = "--undefined--") %>%
rename_all(~str_remove_all(., "\\.s\\.|\\.Hz\\.") %>% tolower())
}
) %>%
set_names(file_names) %>%
bind_rows(.id = "audio")
if (long_format) x <- pivot_longer(x, starts_with("f"), names_to = "formant", values_to = "value")
return(x)
}
# some examples ----
# load audios
path <- "C:/Users/gonza/Documents/cognate-priming/Stimuli/Sounds/sounds_cat"
# extract and plot durations
durations <- get_audio_duration(path, sampling_rate = 48000, units = "s")
ggplot(durations, aes(x = duration, y = reorder(audio, duration))) +
geom_point() +
labs(x = "Duration (s)", y = "Audio") +
theme_minimal()
# extract and plot raw amplitudes
amplitudes <- get_audio_amplitude(path, sampling_rate = 48000, units = "s")
# subsetting only two audios (plotting all audios may take a while
ggplot(amplitudes[amplitudes$audio %in% c("abella-i.wav", "porta.wav"),], aes(x = time, y = amplitude)) +
facet_wrap(~audio, ncol = 1) +
geom_line() +
labs(x = "Time (s)", y = "Amplitude (dB)") +
theme_minimal()
# extract and plot mean amplitudes
mean_amplitudes <- amplitudes %>%
group_by(audio) %>%
summarise(mean_amplitude = mean(amplitude, na.rm = TRUE), .groups = "drop")
ggplot(mean_amplitudes, aes(x = mean_amplitude, y = reorder(audio, mean_amplitude))) +
geom_point() +
labs(x = "Duration (s)", y = "Audio") +
theme_minimal()
# extract and plot pitch
pitch <- get_audio_pitch(path)
ggplot(pitch, aes(x = time, y = f0)) +
geom_line(aes(colour = audio), alpha = 0.5) +
geom_smooth(colour = "black") +
labs(x = "Time", y = "F0 (Hz)") +
theme_minimal() +
theme(legend.position = "none")
# extract and plot pitch for one audio (with local maxima)
pitch_maxima <- get_pitch_maxima(pitch)
ggplot(pitch[pitch$audio=="abella-i.wav",], aes(x = time, y = f0)) +
geom_line(size = 1) +
geom_point(data = pitch_maxima[pitch_maxima$audio=="abella-i.wav",], size = 5, colour = "red") +
labs(x = "Time", y = "F0 (Hz)") +
theme_minimal() +
theme(legend.position = "none")
# extract and plot formants
formants <- get_audio_formants(path = path, max_formants = 2, long_format = TRUE)
ggplot(formants[formants$audio=="abella-i.wav",], aes(x = time, y = value, colour = formant, group = formant)) +
geom_point(size = 1, alpha = 0.5) +
geom_smooth() +
labs(x = "Time (s)", y = "Frequency (Hz)", colour = "Formant") +
theme_minimal()
# extract and plot mean formants
mean_formants <- formants %>%
group_by(audio, formant) %>%
summarise(
mean_value = mean(value, na.rm = TRUE),
sd_value = sd(value, na.rm = TRUE),
.groups = "drop"
)
ggplot(mean_formants, aes(x = mean_value, y = reorder(audio, mean_value), colour = formant)) +
geom_errorbar(aes(xmin = mean_value-sd_value, xmax = mean_value+sd_value)) +
geom_point() +
labs(x = "Frequency (Hz)", y = "Audio", colour = "Formant") +
theme_minimal()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment