View 4-umap.py
# Define UMAP | |
brain_umap = umap.UMAP(random_state=999, n_neighbors=30, min_dist=.25) | |
# Fit UMAP and extract latent vars 1-2 | |
embedding = pd.DataFrame(brain_umap.fit_transform(matrix), columns = ['UMAP1','UMAP2']) | |
# Produce sns.scatterplot and pass metadata.subclasses as color | |
sns_plot = sns.scatterplot(x='UMAP1', y='UMAP2', data=embedding, | |
hue=metadata.subclass_label.to_list(), | |
alpha=.1, linewidth=0, s=1) |
View 3-umap.py
# Remove expression features with > 50% zero-valued expression levels | |
is_expressed = np.apply_along_axis(lambda x: np.mean(x == 0) < .5, arr=matrix, axis=0) | |
matrix = matrix[:,is_expressed.tolist()] | |
# Log2-transform | |
matrix = np.log2(matrix.to_numpy() + 1) |
View 2-umap.py
# Check first five columns in matrix.csv | |
#!cut -d, -f-5 matrix.csv | head | |
# Import data with Bash command discarding first column | |
matrix = dt.fread(cmd='cut -d, -f2- matrix.csv', | |
header=True, sep=',', columns=dt.int32) # ~7 GB (76533, 50281) | |
# Import metadata | |
metadata = pd.read_csv('metadata.csv') |
View 1-umap.py
# Imports | |
#!pip install datatable | |
import os, umap | |
import numpy as np | |
import pandas as pd | |
import datatable as dt | |
import seaborn as sns | |
os.chdir('PATH/TO/WDIR') |
View 4-audioClass.R
# read, downsample, clip, mel spec, normalize and remove noise | |
melspec <- function(x, start, end){ | |
mp3 <- readMP3(filename = x) %>% | |
extractWave(xunit = "time", | |
from = start, to = end) | |
# return log-spectrogram with 256 Mel bands and compression | |
sp <- melfcc(mp3, nbands = 256, usecmp = T, | |
spec_out = T, | |
hoptime = (end-start) / 256)$aspectrum |
View 12-audioClass.R
# Test set prediction | |
predXProb <- predict(model, test$X) | |
predXClass <- speciesClass[apply(predXProb, 1, which.max)] | |
trueXClass <- speciesClass[apply(test$Y, 1, which.max)] | |
# Plot confusion matrix | |
confMatTest <- confusionMatrix(data = factor(predXClass, levels = speciesClass), | |
reference = factor(trueXClass, levels = speciesClass)) | |
pheatmap(confMatTest$table, cluster_rows = F, cluster_cols = F, |
View 11-audioClass.R
# Grep species, set colors for heatmap | |
speciesClass <- gsub(colnames(train$Y), pat = "species", rep = "") | |
cols <- colorRampPalette(rev(brewer.pal(n = 7, name = "RdGy"))) | |
# Validation predictions | |
predProb <- predict(model, val$X) | |
predClass <- speciesClass[apply(predProb, 1, which.max)] | |
trueClass <- speciesClass[apply(val$Y, 1, which.max)] | |
# Plot confusion matrix |
View 10-audioClass.R
# Print summary | |
summary(model) | |
model %>% compile(optimizer = optimizer_adam(decay = 1e-5), | |
loss = "categorical_crossentropy", | |
metrics = "accuracy") | |
history <- fit(model, x = train$X, y = train$Y, | |
batch_size = 16, epochs = 50, | |
validation_data = list(val$X, val$Y)) |
View 9-audioClass.R
# Build model | |
model <- keras_model_sequential() %>% | |
layer_conv_2d(input_shape = dim(train$X)[2:4], | |
filters = 16, kernel_size = c(3, 3), | |
activation = "relu") %>% | |
layer_max_pooling_2d(pool_size = c(2, 2)) %>% | |
layer_dropout(rate = .2) %>% | |
layer_conv_2d(filters = 32, kernel_size = c(3, 3), | |
activation = "relu") %>% |
View 8-audioClass.R
# Fri Feb 7 15:49:46 2020 ------------------------------ | |
setwd("~/Documents/Tutorials/birdsong") | |
library(keras) | |
use_condaenv("plaidml") | |
use_backend("plaidml") | |
k_backend() # plaidml | |
library(tidyverse) | |
library(caret) | |
library(e1071) | |
library(pheatmap) |
NewerOlder