meg-codes/ppa-upset.R

## ppa-upset.R
library(tidyr)
library(dplyr)
library(UpSetR)

# read in the data
ppa <- read.csv("ppa-digitizedworks-20190419T18_24_42.csv")

# subset to only needed rows and split Collection into multiple rows
new_df <- ppa %>% select("Title", "Source.ID", "Collection") %>% separate_rows("Collection", sep=";")
# give a truth column value to map on spread
new_df$truthy <- 1
# spread and add 1 for an existing value, fill 0 otherwise, and then remove unneeded column V1
spread_out <- new_df %>% spread(Collection, truthy, fill=0) %>% select(-c("V1"))

# group by source.id and title, then aggregate using sum
grouped <- spread_out %>% group_by(Source.ID, Title) %>% summarize_all(sum)


upset(as.data.frame(grouped), text.scale = 2, line.size = 2, point.size=4, nsets=7)
	library(tidyr)
	library(dplyr)
	library(UpSetR)

	# read in the data
	ppa <- read.csv("ppa-digitizedworks-20190419T18_24_42.csv")

	# subset to only needed rows and split Collection into multiple rows
	new_df <- ppa %>% select("Title", "Source.ID", "Collection") %>% separate_rows("Collection", sep=";")
	# give a truth column value to map on spread
	new_df$truthy <- 1
	# spread and add 1 for an existing value, fill 0 otherwise, and then remove unneeded column V1
	spread_out <- new_df %>% spread(Collection, truthy, fill=0) %>% select(-c("V1"))

	# group by source.id and title, then aggregate using sum
	grouped <- spread_out %>% group_by(Source.ID, Title) %>% summarize_all(sum)


	upset(as.data.frame(grouped), text.scale = 2, line.size = 2, point.size=4, nsets=7)