Skip to content

Instantly share code, notes, and snippets.

View monogenea's full-sized avatar

Francisco Lima monogenea

View GitHub Profile
# Encode species from fnames regex
species <- str_extract(fnames, patt = "[A-Za-z]+-[a-z]+") %>%
gsub(patt = "-", rep = " ") %>% factor()
# Stratified sampling: train (80%), val (10%) and test (10%)
set.seed(100)
idx <- createFolds(species, k = 10)
valIdx <- idx$Fold01
testIdx <- idx$Fold02
# Define samples for train, val and test
#### Pre-processing ####
# Read files
fnames <- list.files("mp3/", full.names = T, patt = "*.mp3")
# Write metadata for Kaggle dataset
ids <- str_extract(fnames, pattern = "[0-9]{4,}")
query$Path <- fnames[match(query$Recording_ID, ids)]
write.csv(query, "metadata.csv")
# Play random file - setWavPlayer in macOS if "permission denied"
#### Download HQ male song recordings > 30s long from Europe ####
query <- querxc("type:song type:male len_gt:30 q_gt:C area:europe")
query$Species <- with(query, paste(Genus, Specific_epithet))
# Select top 50 most abundant bird species
speciesCount <- sort(table(query$Species), decreasing = T)
topSpecies <- names(speciesCount)[1:50]
query <- query[query$Species %in% topSpecies, ]
# Downsample to min size among the 50 classes
balancedClasses <- lapply(topSpecies, function(x){
set.seed(100)
# Tue Feb 4 19:43:33 2020 ------------------------------
setwd("~/Documents/Tutorials/birdsong")
library(parallel)
library(tidyverse)
library(abind)
library(caret)
library(tuneR)
library(warbleR)
source("funs.R")
myMeans <- vector()
for(i in 1:100){
set.seed(i)
myMeans <- c(myMeans, mean(rpois(10,3)))
}
hist(myMeans, main = NULL, xlab = expression(bar(x)))
n <- 1:20
den <- dpois(n, 3)
plot(den, xlab = "Outcome", ylab = "Density")
qnorm(1-0.16,1000,200) # = 1198.892
pnorm(1200,1000,200) # this gives us prob x smaller than 1200eur
1-pnorm(1200,1000,200) # this is the one, x greater than 1200eur
n <- 1:20
den <- dbinom(n, 20, 0.7)
plot(den, ylab = "Density", xlab = "Number of successes")
sum(den) # = 1
# Fit lm using all 14 vars
modHousesFull <- lm(MEDV ~ ., data = houses)
summary(modHousesFull) # R2 = 0.741
# Compare obs. vs. pred. plots
par(mfrow = c(1,2))
plot(houses$MEDV, predict(modHouses),
xlab = "Observed MEDV", ylab = "Predicted MEDV",
main = "PCR", abline(a = 0, b = 1, col = "red"))
plot(houses$MEDV, predict(modHousesFull),