TimMaloney/artefact-morpho.R

## artefact-morpho.R
# working with images

# Get screenshot from scanstudio
# edit > paste into gimp (image > check canvas size)
# rectangle select tool
# image > crop to selection
# convert to B&W (http://www.gimp.org/tutorials/Color2BW/)
# colour > threshold increase to 200
# paintbrush tool > white colour over background
# save as xcf (no spaces in filename)
# export to jpg (no spaces in filename)
# setup: make sure you have
# installed http://www.imagemagick.org/script/binary-releases.php#windows
# and restarted RStudio

# get functions from Claude's website (you can download this txt file
# and source from your disk instead)
 source("http://www.isem.cnrs.fr/IMG/txt/Rfunctions.txt")
source("C:\\Users\\marwick\\Downloads\\Rfunctions.txt")
# library for greyscale stuff
library(pixmap)

# set working dir (where the jpgs are)
# make sure that all the jpgs in this folder have
# filenames that are consistently patterned
# withe underscores separating the terms, something like
# "type__subtype_region_id.jpg
setwd("C:/Users/Tim Maloney/Documents/artefact_images")

# make a list of jpgs to work on
list_imgs <- list.files(getwd(), pattern = ".jpg$")
# inspect list to make sure nothing dodgy is there
list_imgs
# classify the list for later analysis, we'll just get the
# word before the first underscore since that's a 'type' word
class <- unname(sapply(list_imgs, function(i) unlist(strsplit(i, split="_"))[1]))
# get vector of artefact IDs
nms <- gsub(".jpg", "", list_imgs)
# make a data frame of classifications and names for later use
fac <- data.frame(class = class, nms = nms)


# Now loop over each jpg in the list to trace its outline

# create an object to store the results
outlines <- vector("list", length = length(list_imgs))

# here's the loop
for(i in 1:length(list_imgs)) {

  # from Claude p. 40
  img <- nms[i]
  shell(shQuote(paste0("convert ", img, ".jpg ", img, ".ppm")))
  M<- read.pnm(paste0(img, ".ppm"))
  plot(M)

  # Convert the RGB image to a gray-scale image. (is this necessary?)
  M<- as(M, "pixmapGrey")

  # Claude p. 48
  # Binarize the image (is this necessary?)

  M@grey[which(M@grey>=0.9)]<-1
  M@grey[which(M@grey<0.9)]<-0.7
  plot(M)

  # auto-trace outline of shape
  # start<-locator(1) # now click on shape to pick starting point
  # or make a guess of where to start
  start <- list(x = 100, y = 100)
  Rc<-Conte(c(round(start$x),round(start$y)),M@grey)
  lines(Rc$X, Rc$Y, lwd=4)

  # this is the outline, we can inspect it...
  str(Rc) # inspect data
  # draw plot
  plot(Rc$X, Rc$Y, type = "l", asp = TRUE)

  # store result in list for further analysis
  outlines[[i]] <- Rc
}

# Now for analysis of the outlines...
# I use Momocs2, which is a copy of the package that I've made
# some changes to (it's only on my hard drive). You can use Momocs
# from CRAN (the usual repository), it should work the same for
# these functions...
library(Momocs)
# convert outlines to 'Coo' format
coords <- Coo(lapply(outlines, function(i) (simplify2array(i))))
# center the coords
coords_center <- Coo(lapply(coords@coo, coo.center))

# scale the coords
coords_scale <- Coo(lapply(coords_center@coo, function(i) coo.scale(i, 1)))

# give the images their file names
slot(coords_scale, 'fac') <- fac
slot(coords_scale, 'names') <- as.character(fac$nms)

# quick plot
panel(coords_scale,  borders="black", cols="grey90") # no names
panel(coords_scale,  borders="black", names=TRUE, cols="grey90") # with names

# determine how many harmonics to use in
# the elliptical fourier analysis
# (now just working through http://www.vincentbonhomme.fr/Momocs/vignettes/A-graphical-introduction-to-Momocs.pdf)

# one method...
windows() # pops up a new window to hold plots
# set up a grid of plots...
par(mfrow=c(ceiling(length(fac$nms)/2),ceiling(length(fac$nms)/2)-1))
lapply(coords_center@coo, function(i) hpow(Coo(i), nb.h = nb.h))
dev.off() # may need to repeat this a few times to free up the graphics device

# another approach...
lapply(coords_center@coo, function(i) hqual(Coo(i), harm.range = seq(1, nb.h, 10),  plot.method = c("panel")[1]))
lapply(coords_center@coo, function(i) hqual(Coo(i), harm.range = seq(1, nb.h, 10),  plot.method = c("stack")[1]))

# we want to chose the lowest number of harmonics that captures
# the most meaningful variation in shape. I've gone for 100 here...
nb.h = 90

# Perform Fourier analysis
coords_F <- eFourier(coords_center, nb.h=nb.h) # elliptical
coords_R <- rFourier(coords_center, nb.h=nb.h) # radii variation
coords_T <- tFourier(coords_center, nb.h=nb.h) # tangent

# put the names and classifications on again
slot(coords_F, 'fac') <- fac
slot(coords_F, 'names') <- as.character(fac$nms)

# explore contribution of harmonics
hcontrib(coords_F)
hcontrib(coords_R)
hcontrib(coords_T)

# compute Principal Component Analysis
coords_D <- pca(coords_F)
dimnames(coords_D$coe)[[1]] <- as.character(fac$nms)
# plot
dudi.plot(coords_D)
# many other types of plot

# observe how to change the point labelling
dudi.plot(coords_D, title="coords_D with no class but with ellipses")
dudi.plot(coords_D, 1, title="coords_D with no class but with ellipses")
dudi.plot(coords_D, 2, title="coords_D with no class but with ellipses")


dudi.plot(coords_D, 1, ellipses=FALSE, neighbors=TRUE,
          shapes=FALSE, star=FALSE, col.nei="black",
          title="coords_D with Gabriel's neighboring graph")

# this one looks good, I think:
dudi.plot(coords_D, 1, labels=FALSE, points=FALSE, boxes=FALSE,
          shapes=TRUE, pos.shp="li",
          title="coords_D with labels and reconstructed shapes")

dudi.plot(coords_D, 1, points=FALSE, labels=TRUE,
          boxes=FALSE, shapes=FALSE,
          title="coords_D with labels and ellipse")

dudi.plot(coords_D, 1, arrows=TRUE, dratio.arrow=0.2, shapes=FALSE,
          title="coords_D with harmonic correlations")

# even more plots...
morpho.space(coords_D)
title("Default")
morpho.space(coords_D, nr.shp=3, nc.shp=3,
             col.shp="#1A1A1A22", border.shp=NA, pch.pts=5)
title("Custom nb of shapes")
morpho.space(coords_D, pos.shp="li")
title("Using the PC coordinates")
morpho.space(coords_D, pos.shp="circle")
title("A circle of shapes")
morpho.space(coords_D, xlim=c(-0.25, 0.25), ylim=c(-0.2, 0.2),
             pos.shp=expand.grid(seq(-0.2, 0.2, 0.1) , seq(-0.2, 0.2, 0.1)),
             border.shp="black", col.shp="#00000011", col.pts="firebrick")
title("Custom shape positions")
morpho.space(coords_D, xax=3, yax=4)
title("Morpho space on PC3 and PC4")
morpho.space(coords_D, amp.shp=3)
title("3 times magnif. differences")
morpho.space(pca(coords_R), rotate.shp=pi/6)
title("Using rFourier")

# display the contribution of Principal Component to shape description.
PC.contrib(coords_D)
PC.contrib(coords_D, PC.r=1:3, sd=3,
           cols=paste0(col.sari(3), "55"), borders=col.sari(3))


# test differences between groups
# through a Multivariate Analysis of Variance.
(mnv <- manova.Coe(coords_F, "class", retain = 5))


# cluster analysis and dendrograms
library(MASS)
library(shapes)

# interpolate point so each shape has the same number
interp <- Coo(lapply(outlines, function(i) coo.sample.int(simplify2array(i), 3000) ))


## experimental, incomplete and not working...
gorf<-gorf.dat
panf<-panf.dat[c(5,1,2:4,6:8),,]
pongof<-pongof.dat[c(5,1,2:4,6:8),,]
APE<-array(c(panf, gorf, pongof),dim=c(8,2,80))
APE<-array(c(panf, gorf),dim=c(8,2,56))
AP<-orp(pgpa(APE)$rotated)
m<-t(matrix(AP, 16, 56))
par(mar=c(0.5,2,1,1))
layout(matrix(c(1,2),2,1))
plot(hclust(dist(m), method="average"),main="UPGMA"
     ,labels=c(rep("P",26),rep("G",30)),cex=0.7)
plot(hclust(dist(m), method="complete"),main="COMPLETE"
     ,labels=c(rep("P",26),rep("G",30)),cex=0.7)


# inspect thin plate splines
x <- meanShapes(coords_F)
stack(Coo(x), borders=col.gallus(2))
tps.grid(botFg[[1]], botFg[[2]]) # have a look to ?"["
tps.grid(botFg$beer, botFg$whisky, grid.outside=2,
         grid.size=80, amp=3, plot.full=FALSE)


set.seed(007)
my_list<- lapply(1:10, function(i) matrix(data = runif(sample(seq(2,10,2), 1)), ncol = 2))

my_array<-Map(function(x,y,z) array(n,c(x,y,z)), myrow, mycol,myz) # but this creates the list of 1 array
	# working with images

	# Get screenshot from scanstudio
	# edit > paste into gimp (image > check canvas size)
	# rectangle select tool
	# image > crop to selection
	# convert to B&W (http://www.gimp.org/tutorials/Color2BW/)
	# colour > threshold increase to 200
	# paintbrush tool > white colour over background
	# save as xcf (no spaces in filename)
	# export to jpg (no spaces in filename)
	# setup: make sure you have
	# installed http://www.imagemagick.org/script/binary-releases.php#windows
	# and restarted RStudio

	# get functions from Claude's website (you can download this txt file
	# and source from your disk instead)
	source("http://www.isem.cnrs.fr/IMG/txt/Rfunctions.txt")
	source("C:\\Users\\marwick\\Downloads\\Rfunctions.txt")
	# library for greyscale stuff
	library(pixmap)

	# set working dir (where the jpgs are)
	# make sure that all the jpgs in this folder have
	# filenames that are consistently patterned
	# withe underscores separating the terms, something like
	# "type__subtype_region_id.jpg
	setwd("C:/Users/Tim Maloney/Documents/artefact_images")

	# make a list of jpgs to work on
	list_imgs <- list.files(getwd(), pattern = ".jpg$")
	# inspect list to make sure nothing dodgy is there
	list_imgs
	# classify the list for later analysis, we'll just get the
	# word before the first underscore since that's a 'type' word
	class <- unname(sapply(list_imgs, function(i) unlist(strsplit(i, split="_"))[1]))
	# get vector of artefact IDs
	nms <- gsub(".jpg", "", list_imgs)
	# make a data frame of classifications and names for later use
	fac <- data.frame(class = class, nms = nms)


	# Now loop over each jpg in the list to trace its outline

	# create an object to store the results
	outlines <- vector("list", length = length(list_imgs))

	# here's the loop
	for(i in 1:length(list_imgs)) {

	# from Claude p. 40
	img <- nms[i]
	shell(shQuote(paste0("convert ", img, ".jpg ", img, ".ppm")))
	M<- read.pnm(paste0(img, ".ppm"))
	plot(M)

	# Convert the RGB image to a gray-scale image. (is this necessary?)
	M<- as(M, "pixmapGrey")

	# Claude p. 48
	# Binarize the image (is this necessary?)

	M@grey[which(M@grey>=0.9)]<-1
	M@grey[which(M@grey<0.9)]<-0.7
	plot(M)

	# auto-trace outline of shape
	# start<-locator(1) # now click on shape to pick starting point
	# or make a guess of where to start
	start <- list(x = 100, y = 100)
	Rc<-Conte(c(round(start$x),round(start$y)),M@grey)
	lines(Rc$X, Rc$Y, lwd=4)

	# this is the outline, we can inspect it...
	str(Rc) # inspect data
	# draw plot
	plot(Rc$X, Rc$Y, type = "l", asp = TRUE)

	# store result in list for further analysis
	outlines[[i]] <- Rc
	}

	# Now for analysis of the outlines...
	# I use Momocs2, which is a copy of the package that I've made
	# some changes to (it's only on my hard drive). You can use Momocs
	# from CRAN (the usual repository), it should work the same for
	# these functions...
	library(Momocs)
	# convert outlines to 'Coo' format
	coords <- Coo(lapply(outlines, function(i) (simplify2array(i))))
	# center the coords
	coords_center <- Coo(lapply(coords@coo, coo.center))

	# scale the coords
	coords_scale <- Coo(lapply(coords_center@coo, function(i) coo.scale(i, 1)))

	# give the images their file names
	slot(coords_scale, 'fac') <- fac
	slot(coords_scale, 'names') <- as.character(fac$nms)

	# quick plot
	panel(coords_scale, borders="black", cols="grey90") # no names
	panel(coords_scale, borders="black", names=TRUE, cols="grey90") # with names

	# determine how many harmonics to use in
	# the elliptical fourier analysis
	# (now just working through http://www.vincentbonhomme.fr/Momocs/vignettes/A-graphical-introduction-to-Momocs.pdf)

	# one method...
	windows() # pops up a new window to hold plots
	# set up a grid of plots...
	par(mfrow=c(ceiling(length(fac$nms)/2),ceiling(length(fac$nms)/2)-1))
	lapply(coords_center@coo, function(i) hpow(Coo(i), nb.h = nb.h))
	dev.off() # may need to repeat this a few times to free up the graphics device

	# another approach...
	lapply(coords_center@coo, function(i) hqual(Coo(i), harm.range = seq(1, nb.h, 10), plot.method = c("panel")[1]))
	lapply(coords_center@coo, function(i) hqual(Coo(i), harm.range = seq(1, nb.h, 10), plot.method = c("stack")[1]))

	# we want to chose the lowest number of harmonics that captures
	# the most meaningful variation in shape. I've gone for 100 here...
	nb.h = 90

	# Perform Fourier analysis
	coords_F <- eFourier(coords_center, nb.h=nb.h) # elliptical
	coords_R <- rFourier(coords_center, nb.h=nb.h) # radii variation
	coords_T <- tFourier(coords_center, nb.h=nb.h) # tangent

	# put the names and classifications on again
	slot(coords_F, 'fac') <- fac
	slot(coords_F, 'names') <- as.character(fac$nms)

	# explore contribution of harmonics
	hcontrib(coords_F)
	hcontrib(coords_R)
	hcontrib(coords_T)

	# compute Principal Component Analysis
	coords_D <- pca(coords_F)
	dimnames(coords_D$coe)[[1]] <- as.character(fac$nms)
	# plot
	dudi.plot(coords_D)
	# many other types of plot

	# observe how to change the point labelling
	dudi.plot(coords_D, title="coords_D with no class but with ellipses")
	dudi.plot(coords_D, 1, title="coords_D with no class but with ellipses")
	dudi.plot(coords_D, 2, title="coords_D with no class but with ellipses")


	dudi.plot(coords_D, 1, ellipses=FALSE, neighbors=TRUE,
	shapes=FALSE, star=FALSE, col.nei="black",
	title="coords_D with Gabriel's neighboring graph")

	# this one looks good, I think:
	dudi.plot(coords_D, 1, labels=FALSE, points=FALSE, boxes=FALSE,
	shapes=TRUE, pos.shp="li",
	title="coords_D with labels and reconstructed shapes")

	dudi.plot(coords_D, 1, points=FALSE, labels=TRUE,
	boxes=FALSE, shapes=FALSE,
	title="coords_D with labels and ellipse")

	dudi.plot(coords_D, 1, arrows=TRUE, dratio.arrow=0.2, shapes=FALSE,
	title="coords_D with harmonic correlations")

	# even more plots...
	morpho.space(coords_D)
	title("Default")
	morpho.space(coords_D, nr.shp=3, nc.shp=3,
	col.shp="#1A1A1A22", border.shp=NA, pch.pts=5)
	title("Custom nb of shapes")
	morpho.space(coords_D, pos.shp="li")
	title("Using the PC coordinates")
	morpho.space(coords_D, pos.shp="circle")
	title("A circle of shapes")
	morpho.space(coords_D, xlim=c(-0.25, 0.25), ylim=c(-0.2, 0.2),
	pos.shp=expand.grid(seq(-0.2, 0.2, 0.1) , seq(-0.2, 0.2, 0.1)),
	border.shp="black", col.shp="#00000011", col.pts="firebrick")
	title("Custom shape positions")
	morpho.space(coords_D, xax=3, yax=4)
	title("Morpho space on PC3 and PC4")
	morpho.space(coords_D, amp.shp=3)
	title("3 times magnif. differences")
	morpho.space(pca(coords_R), rotate.shp=pi/6)
	title("Using rFourier")

	# display the contribution of Principal Component to shape description.
	PC.contrib(coords_D)
	PC.contrib(coords_D, PC.r=1:3, sd=3,
	cols=paste0(col.sari(3), "55"), borders=col.sari(3))


	# test differences between groups
	# through a Multivariate Analysis of Variance.
	(mnv <- manova.Coe(coords_F, "class", retain = 5))


	# cluster analysis and dendrograms
	library(MASS)
	library(shapes)

	# interpolate point so each shape has the same number
	interp <- Coo(lapply(outlines, function(i) coo.sample.int(simplify2array(i), 3000) ))




	## experimental, incomplete and not working...
	gorf<-gorf.dat
	panf<-panf.dat[c(5,1,2:4,6:8),,]
	pongof<-pongof.dat[c(5,1,2:4,6:8),,]
	APE<-array(c(panf, gorf, pongof),dim=c(8,2,80))
	APE<-array(c(panf, gorf),dim=c(8,2,56))
	AP<-orp(pgpa(APE)$rotated)
	m<-t(matrix(AP, 16, 56))
	par(mar=c(0.5,2,1,1))
	layout(matrix(c(1,2),2,1))
	plot(hclust(dist(m), method="average"),main="UPGMA"
	,labels=c(rep("P",26),rep("G",30)),cex=0.7)
	plot(hclust(dist(m), method="complete"),main="COMPLETE"
	,labels=c(rep("P",26),rep("G",30)),cex=0.7)





	# inspect thin plate splines
	x <- meanShapes(coords_F)
	stack(Coo(x), borders=col.gallus(2))
	tps.grid(botFg[[1]], botFg[[2]]) # have a look to ?"["
	tps.grid(botFg$beer, botFg$whisky, grid.outside=2,
	grid.size=80, amp=3, plot.full=FALSE)


	set.seed(007)
	my_list<- lapply(1:10, function(i) matrix(data = runif(sample(seq(2,10,2), 1)), ncol = 2))

	my_array<-Map(function(x,y,z) array(n,c(x,y,z)), myrow, mycol,myz) # but this creates the list of 1 array