Last active
August 29, 2015 14:14
-
-
Save oscardelama/c04ae36cd68727e1614a to your computer and use it in GitHub Desktop.
rgb-noise: Select samples
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# 1) Read and digest the channels with valid values | |
vvm.all <- vvm$new(has.RGGB.pattern = TRUE) | |
vvm.all$digest( | |
file.name.from = '_ODL0387s4', | |
file.name.to = '_ODL1671s6', | |
file.name.ext = '.pgm', | |
file.path = 'ISO100/crops', | |
min.raw = 4, | |
max.raw = c(16379, 15774, 15774 ,16379)) | |
# 2) Fit a robust, weighted, quadratic model | |
vvm.all$fit.model(model.name = 'weighted', model.family = 'lmrob', weights=1/mean^2) | |
# 3) Get the var and mean values for the average green channel | |
avg.green.means <- subset(vvm.all$var.df, channel == 'Green Avg') | |
avg.green.means <- avg.green.means[with(avg.green.means, order(mean)), ] | |
# 4) Get the predictions of var for the average green mean values in the data | |
avg.green.var.preds <- vvm.all$get.model.predictions(model.name = 'weighted', | |
select = 'Green Avg', | |
x = avg.green.means$mean) | |
# 5) Compute the relative error between predicted and real average green var | |
avg.green.means$avg.green.rel.err <- (avg.green.means$var - avg.green.var.preds$var)/ | |
(avg.green.var.preds$upl - avg.green.var.preds$var) | |
# Take a look to the histogram of relative errors | |
hist(avg.green.means$avg.green.rel.err) | |
# 6) Keep in the best.samples data frame samples with absolute relative error below 0.4 | |
best.samples <- subset(avg.green.means, abs(avg.green.rel.err) < 0.4) | |
# 7) Get the wide var data frame with all the picture channels per row | |
all.samples <- vvm.all$wide.var.df | |
# 8) Keep only the pictures with data not NA for all the channels | |
complete.samples <- all.samples[complete.cases(all.samples), ] | |
# 9) Keep in best.samples only pictures with data for all the channels | |
best.samples <- subset(best.samples, pict %in% complete.samples$pict) | |
# 10) Find in best.samples 154 clusters of similar mean values | |
chunks <- kmeans(best.samples$mean, 154) | |
# 11) Add to each row en best.samples a variable indicating to which cluster it belongs | |
best.samples$cluster <- chunks$cluster | |
# 12) Split the best.samples rows in a list of clusters with the rows of them | |
pict.clusters <- split(as.character(best.samples$pict), best.samples$cluster) | |
# Function selecting the "median" row | |
my.median <- function(v) { | |
if (length(v) == 1) | |
v[1] | |
else if (length(v) %% 2 == 0) | |
v[length(v) %/% 2] | |
else | |
v[length(v) %/% 2 + 1] | |
} | |
# 13) Select a picture from each cluster | |
sel.pics <- sapply(pict.clusters, my.median) | |
# 14) Save the selected picture sample names | |
write.csv(data.frame('pict'=sel.pics), 'sel-pict.csv', row.names = FALSE) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment