leeper/base-code.R

## base-code.R
url <- "http://varianceexplained.org/files/Brauer2008_DataSet1.tds"

# Clean and tidy the data
d1 <- rio::import(url, format = "tsv")
d2 <- cbind(d1, setNames(do.call(rbind.data.frame,strsplit(d1$NAME, " ?\\|\\| ?"))[,-5],
                         c("name", "BP", "MF", "systematic_name")))
d3 <-
subset(
  within(
    reshape(d2,
      varying = list(names(d2)[grepl("^.0", names(d2))]),
      v.names = "expression",
      times = names(d2)[grepl("^.0", names(d2))],
      timevar = "sample",
      direction = "long"
    ), {
      nutrient = substring(sample, 1, 1);
      rate = as.numeric(gsub("^.{1}", "", sample))
  }),
  !is.na(expression) & systematic_name != "",
  -c(sample,id,NAME,GID,YORF,GWEIGHT)
)

# Visualize a set of four genes
ggplot(subset(d3, BP == "leucine biosynthesis"),
       aes(rate, expression, color = nutrient)) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE) +
  facet_wrap(~name + systematic_name)
	url <- "http://varianceexplained.org/files/Brauer2008_DataSet1.tds"

	# Clean and tidy the data
	d1 <- rio::import(url, format = "tsv")
	d2 <- cbind(d1, setNames(do.call(rbind.data.frame,strsplit(d1$NAME, " ?\\\|\\\| ?"))[,-5],
	c("name", "BP", "MF", "systematic_name")))
	d3 <-
	subset(
	within(
	reshape(d2,
	varying = list(names(d2)[grepl("^.0", names(d2))]),
	v.names = "expression",
	times = names(d2)[grepl("^.0", names(d2))],
	timevar = "sample",
	direction = "long"
	), {
	nutrient = substring(sample, 1, 1);
	rate = as.numeric(gsub("^.{1}", "", sample))
	}),
	!is.na(expression) & systematic_name != "",
	-c(sample,id,NAME,GID,YORF,GWEIGHT)
	)

	# Visualize a set of four genes
	ggplot(subset(d3, BP == "leucine biosynthesis"),
	aes(rate, expression, color = nutrient)) +
	geom_point() +
	geom_smooth(method = "lm", se = FALSE) +
	facet_wrap(~name + systematic_name)