tts/vaskidentograph.r

## vaskidentograph.r
################################################################
#
#  Checkerboard one-by-one dentograph of library classifications.
#  Example: Vaski
#
#  Tuija Sonkkila
#  2012-05-31
#
#  based on
#
#  On Dentographs, A New Method of Visualizing Library Collections
#  by William Denton http://journal.code4lib.org/articles/6300
#
#  R version 2.12.1 (2010-12-16)
#
#  http://data.kirjastot.fi/data.html
#  C0 1.0 Universal (CC0 1.0) Public Domain Dedication
#
################################################################

library(stringr) #v 0.5
library(lattice)

file.location <- "/home/projektit/libdata/books_vaski_ready_tidy_2.csv"

# CSV file
#
# isbn;lang;year;pages;size;c
# NA;fin;1971;NA;NA;26
# NA;fin;1970;NA;NA;25.3
# NA;fin;1962;NA;NA;99.11

v <- read.csv( file.location, sep=";", header=TRUE, stringsAsFactors=FALSE)

# duplicate control
v.uniq <- unique(v)

# take only the c column
vaski <- as.data.frame(v.uniq$c)

# rename the column
names(vaski) <- c("c")

# split the classification number
vaski$c1 <- substring(vaski$c, 1, 1)
vaski$c2 <- substring(vaski$c, 2, 2)

# drop col c
drop.col <- c("c")
vaski <- as.data.frame(vaski[, !(names(vaski) %in% drop.col)])

# keep only rows without chars
vaski <- vaski[grep("[a-zA-Z]", vaski$c1, invert=TRUE), ]
vaski <- vaski[grep("[a-zA-Z]", vaski$c2, invert=TRUE), ]

# NA rows away
vaski <- as.data.frame(vaski[!(is.na(vaski$c1)), ])
vaski <- as.data.frame(vaski[!(is.na(vaski$c2)), ])

# factors
vaski$c1 <- factor(vaski$c1, c("0","1","2","3","4","5","6","7","8","9"))
vaski$c2 <- factor(vaski$c2, c("0","1","2","3","4","5","6","7","8","9"))

# count each combination of factor levels
vaski.table <- table(vaski)

# prepare a file
png("vaski.png")

# draw a plot
palette <- colorRampPalette(c("#eeeeee", "purple"))
levelplot(vaski.table,
    col.regions = palette,
    xlab = "Main",
    ylab="Sub",
    main = "Dentograph of Vaski library classification",
    scales=(x=list(rot=90, at=seq(1, 10),
      labels=c("Yleisteokset 0x", "Filosofia, psykologia 1x",
      "Uskonto 2x", "Yhteiskunta 3x", "Maantiede 4x",
      "Luonnontieteet 5x", "Tekniikka 6x", "Taiteet, liikunta 7x",
      "Kaunokirjallisuus 8x", "Historia 9x"),
      y=list(rot=0, at=seq(1, 10), labels=seq(0, 9))))
  )

dev.off()
	################################################################
	#
	# Checkerboard one-by-one dentograph of library classifications.
	# Example: Vaski
	#
	# Tuija Sonkkila
	# 2012-05-31
	#
	# based on
	#
	# On Dentographs, A New Method of Visualizing Library Collections
	# by William Denton http://journal.code4lib.org/articles/6300
	#
	# R version 2.12.1 (2010-12-16)
	#
	# http://data.kirjastot.fi/data.html
	# C0 1.0 Universal (CC0 1.0) Public Domain Dedication
	#
	################################################################

	library(stringr) #v 0.5
	library(lattice)

	file.location <- "/home/projektit/libdata/books_vaski_ready_tidy_2.csv"

	# CSV file
	#
	# isbn;lang;year;pages;size;c
	# NA;fin;1971;NA;NA;26
	# NA;fin;1970;NA;NA;25.3
	# NA;fin;1962;NA;NA;99.11

	v <- read.csv( file.location, sep=";", header=TRUE, stringsAsFactors=FALSE)

	# duplicate control
	v.uniq <- unique(v)

	# take only the c column
	vaski <- as.data.frame(v.uniq$c)

	# rename the column
	names(vaski) <- c("c")

	# split the classification number
	vaski$c1 <- substring(vaski$c, 1, 1)
	vaski$c2 <- substring(vaski$c, 2, 2)

	# drop col c
	drop.col <- c("c")
	vaski <- as.data.frame(vaski[, !(names(vaski) %in% drop.col)])

	# keep only rows without chars
	vaski <- vaski[grep("[a-zA-Z]", vaski$c1, invert=TRUE), ]
	vaski <- vaski[grep("[a-zA-Z]", vaski$c2, invert=TRUE), ]

	# NA rows away
	vaski <- as.data.frame(vaski[!(is.na(vaski$c1)), ])
	vaski <- as.data.frame(vaski[!(is.na(vaski$c2)), ])

	# factors
	vaski$c1 <- factor(vaski$c1, c("0","1","2","3","4","5","6","7","8","9"))
	vaski$c2 <- factor(vaski$c2, c("0","1","2","3","4","5","6","7","8","9"))

	# count each combination of factor levels
	vaski.table <- table(vaski)

	# prepare a file
	png("vaski.png")

	# draw a plot
	palette <- colorRampPalette(c("#eeeeee", "purple"))
	levelplot(vaski.table,
	col.regions = palette,
	xlab = "Main",
	ylab="Sub",
	main = "Dentograph of Vaski library classification",
	scales=(x=list(rot=90, at=seq(1, 10),
	labels=c("Yleisteokset 0x", "Filosofia, psykologia 1x",
	"Uskonto 2x", "Yhteiskunta 3x", "Maantiede 4x",
	"Luonnontieteet 5x", "Tekniikka 6x", "Taiteet, liikunta 7x",
	"Kaunokirjallisuus 8x", "Historia 9x"),
	y=list(rot=0, at=seq(1, 10), labels=seq(0, 9))))
	)

	dev.off()