Skip to content

Instantly share code, notes, and snippets.

@oscardelama
Last active August 29, 2015 14:14
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save oscardelama/c04ae36cd68727e1614a to your computer and use it in GitHub Desktop.
Save oscardelama/c04ae36cd68727e1614a to your computer and use it in GitHub Desktop.
rgb-noise: Select samples
# 1) Read and digest the channels with valid values
vvm.all <- vvm$new(has.RGGB.pattern = TRUE)
vvm.all$digest(
file.name.from = '_ODL0387s4',
file.name.to = '_ODL1671s6',
file.name.ext = '.pgm',
file.path = 'ISO100/crops',
min.raw = 4,
max.raw = c(16379, 15774, 15774 ,16379))
# 2) Fit a robust, weighted, quadratic model
vvm.all$fit.model(model.name = 'weighted', model.family = 'lmrob', weights=1/mean^2)
# 3) Get the var and mean values for the average green channel
avg.green.means <- subset(vvm.all$var.df, channel == 'Green Avg')
avg.green.means <- avg.green.means[with(avg.green.means, order(mean)), ]
# 4) Get the predictions of var for the average green mean values in the data
avg.green.var.preds <- vvm.all$get.model.predictions(model.name = 'weighted',
select = 'Green Avg',
x = avg.green.means$mean)
# 5) Compute the relative error between predicted and real average green var
avg.green.means$avg.green.rel.err <- (avg.green.means$var - avg.green.var.preds$var)/
(avg.green.var.preds$upl - avg.green.var.preds$var)
# Take a look to the histogram of relative errors
hist(avg.green.means$avg.green.rel.err)
# 6) Keep in the best.samples data frame samples with absolute relative error below 0.4
best.samples <- subset(avg.green.means, abs(avg.green.rel.err) < 0.4)
# 7) Get the wide var data frame with all the picture channels per row
all.samples <- vvm.all$wide.var.df
# 8) Keep only the pictures with data not NA for all the channels
complete.samples <- all.samples[complete.cases(all.samples), ]
# 9) Keep in best.samples only pictures with data for all the channels
best.samples <- subset(best.samples, pict %in% complete.samples$pict)
# 10) Find in best.samples 154 clusters of similar mean values
chunks <- kmeans(best.samples$mean, 154)
# 11) Add to each row en best.samples a variable indicating to which cluster it belongs
best.samples$cluster <- chunks$cluster
# 12) Split the best.samples rows in a list of clusters with the rows of them
pict.clusters <- split(as.character(best.samples$pict), best.samples$cluster)
# Function selecting the "median" row
my.median <- function(v) {
if (length(v) == 1)
v[1]
else if (length(v) %% 2 == 0)
v[length(v) %/% 2]
else
v[length(v) %/% 2 + 1]
}
# 13) Select a picture from each cluster
sel.pics <- sapply(pict.clusters, my.median)
# 14) Save the selected picture sample names
write.csv(data.frame('pict'=sel.pics), 'sel-pict.csv', row.names = FALSE)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment