tts/gist:2867922

## gistfile1.r
################################################################
#
#  One-by-one checkerboard dentograph of library classifications.
#  Example: Helmet fiction
#
#  Tuija Sonkkila
#  2012-05-31
#
#  based on
#
#  On Dentographs, A New Method of Visualizing Library Collections
#  by William Denton
#
#  http://journal.code4lib.org/articles/6300
#
#  R version 2.12.1 (2010-12-16)
#
#  http://data.kirjastot.fi/data.html
#  Nimeä-Tarttuva 1.0 Suomi (CC BY-SA 1.0)
################################################################

library(stringr) # v 0.5
library(lattice)

file.location <- "/home/projektit/libdata/helmet_fiction_ready_tidy_2.csv"

# read the CSV file
#
# isbn;lang;year;pages;size;c
# NA;fin;1971;NA;NA;1.4
# NA;fin;1970;NA;NA;4.2
# NA;fin;1962;NA;NA;4.791

xf <- read.csv( file.location, sep=";", stringsAsFactors=FALSE)

# duplicates away
xf.uniq <- unique(xf)

# take only the c column
helmetf <- as.data.frame(xf.uniq$c)

# rename the c column
names(helmetf) <- c("c")

# spread out two first levels of the classification number
# in fiction, the main classification numbers are between 1 and 9
# eg 1, 1.4, 2.8
helmetf$c1 <- substring(helmetf$c, 1, 1)
helmetf$c2 <- substring(helmetf$c, 3, 3)

# drop column c
drop.col <- c("c")
helmetf <- as.data.frame(helmetf[, !(names(helmetf) %in% drop.col)])

# keep only rows without chars
helmetf <- helmetf[grep("[a-zA-Z]", helmetf$c1, invert=TRUE), ]
helmetf <- helmetf[grep("[a-zA-Z]", helmetf$c2, invert=TRUE), ]

# NA rows away
helmetf <- as.data.frame(helmetf[!(is.na(helmetf$c1)), ])
helmetf <- as.data.frame(helmetf[!(is.na(helmetf$c2)), ])

# factors
helmetf$c1 <- factor(helmetf$c1, c("1","2","3","4","5","6","7","8","9"))
helmetf$c2 <- factor(helmetf$c2, c("1","2","3","4","5","6","7","8","9"))

# count each combination of factor levels
helmetf.table <- table(helmetf)

# plot to a file
png("helmet_fiction.png")

# draw a levelplot
palette <- colorRampPalette(c("#eeeeee", "purple"))
levelplot(helmetf.table,
    col.regions = palette,
    xlab = "Main",
    ylab="Sub",
    main = "Dentograph of Helmet library fiction classification",
    scales=(x=list(rot=45, at=seq(1, 9),
      labels=c("Suomi 1x",
      "Ruotsi 2x", "Saksa 3x", "Englanti 4x",
      "Ranska 5x", "Italia 6x", "Espanja 7x",
      "Venäjä 8x", "Muut 9x"),
      y=list(rot=1, at=seq(1, 9), labels=seq(1, 9))))
  )

dev.off()
	################################################################
	#
	# One-by-one checkerboard dentograph of library classifications.
	# Example: Helmet fiction
	#
	# Tuija Sonkkila
	# 2012-05-31
	#
	# based on
	#
	# On Dentographs, A New Method of Visualizing Library Collections
	# by William Denton
	#
	# http://journal.code4lib.org/articles/6300
	#
	# R version 2.12.1 (2010-12-16)
	#
	# http://data.kirjastot.fi/data.html
	# Nimeä-Tarttuva 1.0 Suomi (CC BY-SA 1.0)
	################################################################

	library(stringr) # v 0.5
	library(lattice)

	file.location <- "/home/projektit/libdata/helmet_fiction_ready_tidy_2.csv"

	# read the CSV file
	#
	# isbn;lang;year;pages;size;c
	# NA;fin;1971;NA;NA;1.4
	# NA;fin;1970;NA;NA;4.2
	# NA;fin;1962;NA;NA;4.791

	xf <- read.csv( file.location, sep=";", stringsAsFactors=FALSE)

	# duplicates away
	xf.uniq <- unique(xf)

	# take only the c column
	helmetf <- as.data.frame(xf.uniq$c)

	# rename the c column
	names(helmetf) <- c("c")

	# spread out two first levels of the classification number
	# in fiction, the main classification numbers are between 1 and 9
	# eg 1, 1.4, 2.8
	helmetf$c1 <- substring(helmetf$c, 1, 1)
	helmetf$c2 <- substring(helmetf$c, 3, 3)

	# drop column c
	drop.col <- c("c")
	helmetf <- as.data.frame(helmetf[, !(names(helmetf) %in% drop.col)])

	# keep only rows without chars
	helmetf <- helmetf[grep("[a-zA-Z]", helmetf$c1, invert=TRUE), ]
	helmetf <- helmetf[grep("[a-zA-Z]", helmetf$c2, invert=TRUE), ]

	# NA rows away
	helmetf <- as.data.frame(helmetf[!(is.na(helmetf$c1)), ])
	helmetf <- as.data.frame(helmetf[!(is.na(helmetf$c2)), ])

	# factors
	helmetf$c1 <- factor(helmetf$c1, c("1","2","3","4","5","6","7","8","9"))
	helmetf$c2 <- factor(helmetf$c2, c("1","2","3","4","5","6","7","8","9"))

	# count each combination of factor levels
	helmetf.table <- table(helmetf)

	# plot to a file
	png("helmet_fiction.png")

	# draw a levelplot
	palette <- colorRampPalette(c("#eeeeee", "purple"))
	levelplot(helmetf.table,
	col.regions = palette,
	xlab = "Main",
	ylab="Sub",
	main = "Dentograph of Helmet library fiction classification",
	scales=(x=list(rot=45, at=seq(1, 9),
	labels=c("Suomi 1x",
	"Ruotsi 2x", "Saksa 3x", "Englanti 4x",
	"Ranska 5x", "Italia 6x", "Espanja 7x",
	"Venäjä 8x", "Muut 9x"),
	y=list(rot=1, at=seq(1, 9), labels=seq(1, 9))))
	)

	dev.off()