Skip to content

Instantly share code, notes, and snippets.

@benmarwick
Last active August 13, 2019 00:51
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save benmarwick/dc8a8abd17207f907123e65160dbccf8 to your computer and use it in GitHub Desktop.
Save benmarwick/dc8a8abd17207f907123e65160dbccf8 to your computer and use it in GitHub Desktop.
Exploring the PCA published by Prentiss (1998) to understand the usefulness of the Sullivan and Rozen debitage typology
#------------------------------------------------------
# Exploring the PCA published by Prentiss (1998) to understand the
# usefulness of the Sullivan and Rozen debitage typology
# read in & tidy the data -----------------------------------------------
library(tidyverse)
# got these data from table 7 (p. 644) of https://www.jstor.org/stable/2694112
# OCR'd using https://tabula.technology/
prentiss <- readr::read_csv("tabula-Prentiss 1988.csv")
# the abbreviations are, from the paper:
# CF = Complete Flake; PF = Proximal Fragment; MDF = Medial/distal;Fragment;
# NF = Nonorientable Fragment;SF = Split Flake; HH = Hard Hammer; SH = Soft Hammer;
# PR = Pressure; PC = Prepared Core; UPC = Unprepared Core; BF = Biface; FL = Flake Tool.
# transpose to long format for analysis and plotting
prentiss_long <-
prentiss %>%
gather(variable, value, -var) %>%
spread(var, value) %>%
as.data.frame()
# split out the labels to use for plotting later,
# and convert them to factors because that's what the plotting functions need
prentiss_long_labels <-
prentiss_long %>%
separate(variable,
sep = "-",
into = c("technological_origin", "reduction_activity", "number"),
remove = FALSE) %>%
mutate_at(vars(technological_origin,
reduction_activity,
number,
variable),
as.factor)
# remove character cols for scatterplot panel
prentiss_long_rownames <- prentiss_long
row.names(prentiss_long_rownames) <- prentiss_long_rownames$variable
prentiss_long_rownames <- prentiss_long_rownames[,-1]
# scatterplot panel -----------------------------------------------
# show scatterplot panel to get an overview of how the variables relate
# to each other
library(GGally)
ggpairs(prentiss_long_rownames) +
theme_bw()
# Compute & inspect the PCA -----------------------------------------------
# put the sample names as row names so
# they show up later as labels on the plot
row.names(prentiss_long_labels) <- prentiss_long_labels$variable
library(FactoMineR)
# compute PCA
res.pca <- PCA(prentiss_long_labels,
quali.sup = 1:4,
graph = FALSE)
# inspect eigenvalues, values >1 indicate that component captures more
# variability that any of the original measurement variables
eigenvalues <- res.pca$eig
head(eigenvalues[, 1:2])
# inspect distribution of PCs
library(factoextra)
fviz_screeplot(res.pca)
# Visualise output from the PCA ------------------------------------------
# plot variable loadings
fviz_pca_var(res.pca,
col.var="contrib") +
scale_color_viridis_c() +
theme_bw()
# inspect PC1 vs PC2
fviz_pca_ind(res.pca,
geom = "text") +
theme_bw() +
coord_equal()
# show ellipses for force application type
fviz_pca_biplot(res.pca,
geom = "text",
habillage = prentiss_long_labels$technological_origin,
addEllipses=TRUE) +
theme_bw() +
coord_equal() +
ggtitle("PCA showing artefacts grouped by technological origin")
# show ellipses for technology type
fviz_pca_biplot(res.pca,
geom = "text",
habillage = prentiss_long_labels$reduction_activity,
addEllipses=TRUE) +
theme_bw() +
coord_equal() +
ggtitle("PCA showing artefacts grouped by reduction activity")
var HH-PC-1 SH-UPC-2 HH-BF-3 SH-BF-4 HH-UPC-5 SH-UPC-6 HH-PC-7 SH-PC-8 HH-FL-9 SH-FL-10 PR-FL-11 HH-FL-12 SH-BF-13 PR-BF-14 HH-BF-15 SH-UPC-16 PR-UPC-17 HH-UPC-18 SH-PC-19 PR-PC-20
CF 0.04 0.11 0.11 0.03 0.09 0.03 0.11 0.05 0.44 0.27 0.12 0.27 0.09 0.07 0.08 0.07 0.11 0.18 0.15 0.14
PF 0.08 0.17 0.12 0.14 0.12 0.1 0.17 0.13 0.03 0.16 0.28 0.16 0.16 0.17 0.11 0.08 0.41 0.16 0.16 0.26
MDF 0.71 0.57 0.63 0.75 0.64 0.66 0.63 0.65 0.39 0.51 0.36 0.51 0.66 0.59 0.63 0.64 0.37 0.51 0.63 0.35
NF 0.1 0.09 0.07 0.03 0.07 0.06 0.06 0.09 0 0.01 0 0.01 0 0 0.1 0.14 0.01 0.11 0 0.01
SF 0.07 0.06 0.07 0.05 0.08 0.15 0.03 0.08 0.14 0.05 0.24 0.05 0.09 0.17 0.08 0.09 0.1 0.04 0.06 0.24
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment