rgb-noise: Select samples
# 1) Read and digest the channels with valid values | |
vvm.all <- vvm$new(has.RGGB.pattern = TRUE) | |
vvm.all$digest( | |
file.name.from = '_ODL0387s4', | |
file.name.to = '_ODL1671s6', | |
file.name.ext = '.pgm', | |
file.path = 'ISO100/crops', | |
min.raw = 4, | |
max.raw = c(16379, 15774, 15774 ,16379)) | |
# 2) Fit a robust, weighted, quadratic model | |
vvm.all$fit.model(model.name = 'weighted', model.family = 'lmrob', weights=1/mean^2) | |
# 3) Get the var and mean values for the average green channel | |
avg.green.means <- subset(vvm.all$var.df, channel == 'Green Avg') | |
avg.green.means <- avg.green.means[with(avg.green.means, order(mean)), ] | |
# 4) Get the predictions of var for the average green mean values in the data | |
avg.green.var.preds <- vvm.all$get.model.predictions(model.name = 'weighted', | |
select = 'Green Avg', | |
x = avg.green.means$mean) | |
# 5) Compute the relative error between predicted and real average green var | |
avg.green.means$avg.green.rel.err <- (avg.green.means$var - avg.green.var.preds$var)/ | |
(avg.green.var.preds$upl - avg.green.var.preds$var) | |
# Take a look to the histogram of relative errors | |
hist(avg.green.means$avg.green.rel.err) | |
# 6) Keep in the best.samples data frame samples with absolute relative error below 0.4 | |
best.samples <- subset(avg.green.means, abs(avg.green.rel.err) < 0.4) | |
# 7) Get the wide var data frame with all the picture channels per row | |
all.samples <- vvm.all$wide.var.df | |
# 8) Keep only the pictures with data not NA for all the channels | |
complete.samples <- all.samples[complete.cases(all.samples), ] | |
# 9) Keep in best.samples only pictures with data for all the channels | |
best.samples <- subset(best.samples, pict %in% complete.samples$pict) | |
# 10) Find in best.samples 154 clusters of similar mean values | |
chunks <- kmeans(best.samples$mean, 154) | |
# 11) Add to each row en best.samples a variable indicating to which cluster it belongs | |
best.samples$cluster <- chunks$cluster | |
# 12) Split the best.samples rows in a list of clusters with the rows of them | |
pict.clusters <- split(as.character(best.samples$pict), best.samples$cluster) | |
# Function selecting the "median" row | |
my.median <- function(v) { | |
if (length(v) == 1) | |
v[1] | |
else if (length(v) %% 2 == 0) | |
v[length(v) %/% 2] | |
else | |
v[length(v) %/% 2 + 1] | |
} | |
# 13) Select a picture from each cluster | |
sel.pics <- sapply(pict.clusters, my.median) | |
# 14) Save the selected picture sample names | |
write.csv(data.frame('pict'=sel.pics), 'sel-pict.csv', row.names = FALSE) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment