Skip to content

Instantly share code, notes, and snippets.

@tts
Created June 4, 2012 12:01
Show Gist options
  • Save tts/2867937 to your computer and use it in GitHub Desktop.
Save tts/2867937 to your computer and use it in GitHub Desktop.
Two-by-two dentogpraph of classification of Vaski library consortia open bibliographic data
################################################################
#
# Two-by-two checkerboard dentograph of library classifications.
# Example: Vaski
#
# Tuija Sonkkila
# 2012-06-04
#
# based on
#
# On Dentographs, A New Method of Visualizing Library Collections
# by William Denton
#
# http://journal.code4lib.org/articles/6300
#
# R version 2.12.1 (2010-12-16)
#
# http://data.kirjastot.fi/data.html
# C0 1.0 Universal (CC0 1.0) Public Domain Dedication
################################################################
library(stringr) #v 0.5
library(lattice)
file.location <- "/home/projektit/libdata/books_vaski_ready_tidy_2.csv"
# read the CSV file
#
# isbn;lang;year;pages;size;c
# NA;fin;1971;NA;NA;26
# NA;fin;1970;NA;NA;25.3
# NA;fin;1962;NA;NA;99.11
v <- read.csv( file.location, sep=";", header=TRUE, stringsAsFactors=FALSE)
# duplicate control
v.uniq <- unique(v)
# take only the c column
vaski <- as.data.frame(v.uniq$c)
# rename the c column
names(vaski) <- c("c")
# spread out the classification number
# 3rd char is either empty or a comma
vaski$c1 <- substring(vaski$c, 1, 2)
vaski$c2 <- substring(vaski$c, 4, 5)
# drop col c
drop.col <- c("c")
vaski <- as.data.frame(vaski[, !(names(vaski) %in% drop.col)])
# keep only rows without chars
vaski <- vaski[grep("[a-zA-Z]", vaski$c1, invert=TRUE), ]
vaski <- vaski[grep("[a-zA-Z]", vaski$c2, invert=TRUE), ]
# NA rows away
vaski <- as.data.frame(vaski[!(is.na(vaski$c1)), ])
vaski <- as.data.frame(vaski[!(is.na(vaski$c2)), ])
# factors
vaski$c1 <- factor(vaski$c1, c("00","01","02","03","04","05","06","07","08","09",10:99))
vaski$c2 <- factor(vaski$c2, c("00","01","02","03","04","05","06","07","08","09",10:99))
# count each combination of factor levels
vaski.table <- table(vaski)
# plot to a file
png("vaski2by2.png")
# draw a plot
palette <- colorRampPalette(c("#eeeeee", "purple"))
levelplot(vaski.table,
col.regions = palette,
xlab = "Hundreds and tens",
ylab = "Two decimals",
main = "Two-by-two dentograph of Vaski library classification",
scales=(x=list(at=seq(1, 100, by=10), labels=paste(seq(0, 9), "0", sep=""))),
panel=function(...){
panel.levelplot(...);
panel.abline(h=seq(11,99, by=10), lty="dashed", col="light grey");
panel.abline(v=seq(11,99, by=10), lty="dashed", col="light grey") }
)
dev.off()
# where are the most items (row, col)?
which(vaski.table == max(vaski.table), arr.ind=TRUE)
# 85, 32
# and how many are there?
max(vaski.table)
# 18834
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment