Francisco Lima monogenea

## 5-audioClass.R
# Encode species from fnames regex
species <- str_extract(fnames, patt = "[A-Za-z]+-[a-z]+") %>%
      gsub(patt = "-", rep = " ") %>% factor()

# Stratified sampling: train (80%), val (10%) and test (10%)
set.seed(100)
idx <- createFolds(species, k = 10)
valIdx <- idx$Fold01
testIdx <- idx$Fold02
# Define samples for train, val and test

## 3-audioClass.R
#### Pre-processing ####
# Read files
fnames <- list.files("mp3/", full.names = T, patt = "*.mp3")

# Write metadata for Kaggle dataset
ids <- str_extract(fnames, pattern = "[0-9]{4,}")
query$Path <- fnames[match(query$Recording_ID, ids)]
write.csv(query, "metadata.csv")

# Play random file - setWavPlayer in macOS if "permission denied"

## 2-audioClass.R
#### Download HQ male song recordings > 30s long from Europe ####
query <- querxc("type:song type:male len_gt:30 q_gt:C area:europe")
query$Species <- with(query, paste(Genus, Specific_epithet))
# Select top 50 most abundant bird species
speciesCount <- sort(table(query$Species), decreasing = T)
topSpecies <- names(speciesCount)[1:50]
query <- query[query$Species %in% topSpecies, ]
# Downsample to min size among the 50 classes
balancedClasses <- lapply(topSpecies, function(x){
   set.seed(100)

## 1-audioClass.R
# Tue Feb  4 19:43:33 2020 ------------------------------
setwd("~/Documents/Tutorials/birdsong")
library(parallel)
library(tidyverse)
library(abind)
library(caret)
library(tuneR)
library(warbleR)
source("funs.R")

## 5-poissonPR.R
myMeans <- vector()
for(i in 1:100){
      set.seed(i)
      myMeans <- c(myMeans, mean(rpois(10,3)))
}
hist(myMeans, main = NULL, xlab = expression(bar(x)))

## 4-poissonPR.R
n <- 1:20
den <- dpois(n, 3)
plot(den, xlab = "Outcome", ylab = "Density")

## 3-poissonPR.R
qnorm(1-0.16,1000,200) # = 1198.892

## 2-poissonPR.R
pnorm(1200,1000,200) # this gives us prob x smaller than 1200eur
1-pnorm(1200,1000,200) # this is the one, x greater than 1200eur

## 1-poissonPR.R
n <- 1:20
den <- dbinom(n, 20, 0.7)
plot(den, ylab = "Density", xlab = "Number of successes")
sum(den) # = 1

## 12-poissonPCA.R
# Fit lm using all 14 vars
modHousesFull <- lm(MEDV ~ ., data = houses)
summary(modHousesFull) # R2 = 0.741

# Compare obs. vs. pred. plots
par(mfrow = c(1,2))
plot(houses$MEDV, predict(modHouses),
     xlab = "Observed MEDV", ylab = "Predicted MEDV",
     main = "PCR", abline(a = 0, b = 1, col = "red"))
plot(houses$MEDV, predict(modHousesFull),
	# Encode species from fnames regex
	species <- str_extract(fnames, patt = "[A-Za-z]+-[a-z]+") %>%
	gsub(patt = "-", rep = " ") %>% factor()

	# Stratified sampling: train (80%), val (10%) and test (10%)
	set.seed(100)
	idx <- createFolds(species, k = 10)
	valIdx <- idx$Fold01
	testIdx <- idx$Fold02
	# Define samples for train, val and test
	#### Pre-processing ####
	# Read files
	fnames <- list.files("mp3/", full.names = T, patt = "*.mp3")

	# Write metadata for Kaggle dataset
	ids <- str_extract(fnames, pattern = "[0-9]{4,}")
	query$Path <- fnames[match(query$Recording_ID, ids)]
	write.csv(query, "metadata.csv")

	# Play random file - setWavPlayer in macOS if "permission denied"
	#### Download HQ male song recordings > 30s long from Europe ####
	query <- querxc("type:song type:male len_gt:30 q_gt:C area:europe")
	query$Species <- with(query, paste(Genus, Specific_epithet))
	# Select top 50 most abundant bird species
	speciesCount <- sort(table(query$Species), decreasing = T)
	topSpecies <- names(speciesCount)[1:50]
	query <- query[query$Species %in% topSpecies, ]
	# Downsample to min size among the 50 classes
	balancedClasses <- lapply(topSpecies, function(x){
	set.seed(100)
	# Tue Feb 4 19:43:33 2020 ------------------------------
	setwd("~/Documents/Tutorials/birdsong")
	library(parallel)
	library(tidyverse)
	library(abind)
	library(caret)
	library(tuneR)
	library(warbleR)
	source("funs.R")
	myMeans <- vector()
	for(i in 1:100){
	set.seed(i)
	myMeans <- c(myMeans, mean(rpois(10,3)))
	}
	hist(myMeans, main = NULL, xlab = expression(bar(x)))
	n <- 1:20
	den <- dpois(n, 3)
	plot(den, xlab = "Outcome", ylab = "Density")
	pnorm(1200,1000,200) # this gives us prob x smaller than 1200eur
	1-pnorm(1200,1000,200) # this is the one, x greater than 1200eur
	n <- 1:20
	den <- dbinom(n, 20, 0.7)
	plot(den, ylab = "Density", xlab = "Number of successes")
	sum(den) # = 1
	# Fit lm using all 14 vars
	modHousesFull <- lm(MEDV ~ ., data = houses)
	summary(modHousesFull) # R2 = 0.741

	# Compare obs. vs. pred. plots
	par(mfrow = c(1,2))
	plot(houses$MEDV, predict(modHouses),
	xlab = "Observed MEDV", ylab = "Predicted MEDV",
	main = "PCR", abline(a = 0, b = 1, col = "red"))
	plot(houses$MEDV, predict(modHousesFull),