Created
June 4, 2012 12:01
-
-
Save tts/2867937 to your computer and use it in GitHub Desktop.
Two-by-two dentogpraph of classification of Vaski library consortia open bibliographic data
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
################################################################ | |
# | |
# Two-by-two checkerboard dentograph of library classifications. | |
# Example: Vaski | |
# | |
# Tuija Sonkkila | |
# 2012-06-04 | |
# | |
# based on | |
# | |
# On Dentographs, A New Method of Visualizing Library Collections | |
# by William Denton | |
# | |
# http://journal.code4lib.org/articles/6300 | |
# | |
# R version 2.12.1 (2010-12-16) | |
# | |
# http://data.kirjastot.fi/data.html | |
# C0 1.0 Universal (CC0 1.0) Public Domain Dedication | |
################################################################ | |
library(stringr) #v 0.5 | |
library(lattice) | |
file.location <- "/home/projektit/libdata/books_vaski_ready_tidy_2.csv" | |
# read the CSV file | |
# | |
# isbn;lang;year;pages;size;c | |
# NA;fin;1971;NA;NA;26 | |
# NA;fin;1970;NA;NA;25.3 | |
# NA;fin;1962;NA;NA;99.11 | |
v <- read.csv( file.location, sep=";", header=TRUE, stringsAsFactors=FALSE) | |
# duplicate control | |
v.uniq <- unique(v) | |
# take only the c column | |
vaski <- as.data.frame(v.uniq$c) | |
# rename the c column | |
names(vaski) <- c("c") | |
# spread out the classification number | |
# 3rd char is either empty or a comma | |
vaski$c1 <- substring(vaski$c, 1, 2) | |
vaski$c2 <- substring(vaski$c, 4, 5) | |
# drop col c | |
drop.col <- c("c") | |
vaski <- as.data.frame(vaski[, !(names(vaski) %in% drop.col)]) | |
# keep only rows without chars | |
vaski <- vaski[grep("[a-zA-Z]", vaski$c1, invert=TRUE), ] | |
vaski <- vaski[grep("[a-zA-Z]", vaski$c2, invert=TRUE), ] | |
# NA rows away | |
vaski <- as.data.frame(vaski[!(is.na(vaski$c1)), ]) | |
vaski <- as.data.frame(vaski[!(is.na(vaski$c2)), ]) | |
# factors | |
vaski$c1 <- factor(vaski$c1, c("00","01","02","03","04","05","06","07","08","09",10:99)) | |
vaski$c2 <- factor(vaski$c2, c("00","01","02","03","04","05","06","07","08","09",10:99)) | |
# count each combination of factor levels | |
vaski.table <- table(vaski) | |
# plot to a file | |
png("vaski2by2.png") | |
# draw a plot | |
palette <- colorRampPalette(c("#eeeeee", "purple")) | |
levelplot(vaski.table, | |
col.regions = palette, | |
xlab = "Hundreds and tens", | |
ylab = "Two decimals", | |
main = "Two-by-two dentograph of Vaski library classification", | |
scales=(x=list(at=seq(1, 100, by=10), labels=paste(seq(0, 9), "0", sep=""))), | |
panel=function(...){ | |
panel.levelplot(...); | |
panel.abline(h=seq(11,99, by=10), lty="dashed", col="light grey"); | |
panel.abline(v=seq(11,99, by=10), lty="dashed", col="light grey") } | |
) | |
dev.off() | |
# where are the most items (row, col)? | |
which(vaski.table == max(vaski.table), arr.ind=TRUE) | |
# 85, 32 | |
# and how many are there? | |
max(vaski.table) | |
# 18834 | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment