Gibbsdavidl/gist:8a20097aaf8bece8fc586310795b54da

## gistfile1.txt
# This shows the contents of my global.R file, that contains the BigQuery SQL, googleAuthR code, and plots.


# global.R

#http://www.cookbook-r.com/Graphs/Multiple_graphs_on_one_page_(ggplot2)/
multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
  library(grid)

  # Make a list from the ... arguments and plotlist
  plots <- c(list(...), plotlist)

  numPlots = length(plots)

  # If layout is NULL, then use 'cols' to determine layout
  if (is.null(layout)) {
    # Make the panel
    # ncol: Number of columns of plots
    # nrow: Number of rows needed, calculated from # of cols
    layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
                     ncol = cols, nrow = ceiling(numPlots/cols))
  }

  if (numPlots==1) {
    print(plots[[1]])

  } else {
    # Set up the page
    grid.newpage()
    pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))

    # Make each plot, in the correct location
    for (i in 1:numPlots) {
      # Get the i,j matrix positions of the regions that contain this subplot
      matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))

      print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
                                      layout.pos.col = matchidx$col))
    }
  }
}

buildAndRunQuery <- function(varName, aproject, cohort) {
  q <- paste(
    "
    WITH
  --
    --
    --
    cohortExpr AS (
    SELECT
    sample_barcode,
    LOG10(normalized_count) AS expr
    FROM
    `isb-cgc.TCGA_hg19_data_v0.RNAseq_Gene_Expression_UNC_RSEM`
    WHERE
    project_short_name = '",cohort,"'
    AND HGNC_gene_symbol = '",varName,"'
    AND normalized_count IS NOT NULL
    AND normalized_count > 0),
    --
    --
    --
    cohortVar AS ( (
    SELECT
    sample_barcode_tumor AS sample_barcode,
    'Mut' AS mutation_status
    FROM
    `isb-cgc.TCGA_hg19_data_v0.Somatic_Mutation_MC3`
    WHERE
    SYMBOL = '",varName,"'
    AND Variant_Classification <> 'Silent'
    AND Variant_Type = 'SNP'
    AND IMPACT <> 'LOW')
    UNION ALL (
    SELECT
    sample_barcode,
    'WT' AS mutation_status
    FROM
    cohortExpr
    WHERE
    sample_barcode NOT IN (
    SELECT
    sample_barcode_tumor AS sample_barcode
    FROM
    `isb-cgc.TCGA_hg19_data_v0.Somatic_Mutation_MC3`
    WHERE
    SYMBOL = '",varName,"') ) ),
    --
    --
    cohort AS (
    SELECT
    cohortExpr.sample_barcode AS sample_barcode,
    mutation_status,
    expr
    FROM
    cohortExpr
    JOIN
    cohortVar
    ON
    cohortExpr.sample_barcode = cohortVar.sample_barcode ),
    --
    --
    grandMeanTable AS (
    SELECT
    AVG(expr) AS grand_mean
    FROM
    cohort ),
    --
    --
    groupMeans1 AS (
    SELECT
    'dummy' AS dummy,
    'WT' AS group_name,
    AVG(expr) AS group_mean,
    STDDEV(expr) AS group_stddev,
    COUNT(sample_barcode) AS n
    FROM
    cohort
    WHERE
    mutation_status = 'WT' ),
    --
    --
    groupMeans2 AS (
    SELECT
    'dummy' AS dummy,
    'Mut' AS group_name,
    AVG(expr) AS group_mean,
    STDDEV(expr) AS group_stddev,
    COUNT(sample_barcode) AS n
    FROM
    cohort
    WHERE
    mutation_status = 'Mut' ),
    --
    --
    groupMeansTable AS ( (
    SELECT
    *
    FROM
    groupMeans1)
    UNION ALL (
    SELECT
    *
    FROM
    groupMeans2) ),
    --
    --
    groupMeansTableForPlots AS (
    SELECT
    a.dummy AS dummy,
    a.group_mean AS wt_group_mean,
    a.group_stddev AS wt_group_stddev,
    a.n AS wt_n,
    b.group_mean AS mut_group_mean,
    b.group_stddev AS mut_group_stddev,
    b.n AS mut_n
    FROM
    groupMeans1 AS a
    JOIN
    groupMeans2 AS b
    ON
    a.dummy = b.dummy),
    --
    -- to get the variance between
    -- we sum the n*(group_mean - grand_mean)^2
    --
    ssBetween AS (
    SELECT
    group_name,
    group_mean,
    grand_mean,
    n,
    n*POW(group_mean - grand_mean,2) AS n_diff_sq
    FROM
    groupMeansTable
    CROSS JOIN
    grandMeanTable ),
    --
    -- Then join the group_means with the expression values
    -- for samples with WT status
    --
    ssWithin AS (
    SELECT
    *
    FROM (
    SELECT
    sample_barcode,
    mutation_status,
    expr,
    group_mean,
    POW(expr - group_mean,2) AS x_minus_xbar_sq
    FROM (
    SELECT
    sample_barcode,
    mutation_status,
    expr
    FROM
    cohort
    WHERE
    mutation_status = 'WT' )
    CROSS JOIN (
    SELECT
    group_mean
    FROM
    ssBetween
    WHERE
    group_name = 'WT' ) )
    --
    --
    UNION ALL
    --
    (
    SELECT
    sample_barcode,
    mutation_status,
    expr,
    group_mean,
    POW(expr - group_mean,2) AS x_minus_xbar_sq
    FROM (
    SELECT
    sample_barcode,
    mutation_status,
    expr
    FROM
    cohort
    WHERE
    mutation_status = 'Mut' )
    CROSS JOIN (
    SELECT
    group_mean
    FROM
    ssBetween
    WHERE
    group_name = 'Mut' ) ) ),
    --
    --
    --
    numerator AS (
    SELECT
    'dummy' AS dummy,
    SUM(n_diff_sq) AS s_sq_between
    FROM
    ssBetween ),
    --
    --
    --
    denominator AS (
    SELECT
    'dummy' AS dummy,
    COUNT(sample_barcode) AS n,
    '2' AS k,
    SUM(x_minus_xbar_sq)/(COUNT(sample_barcode)-2) AS s_sq_within
    FROM
    ssWithin ),
    --
    --
    --
    Ftable AS (
    SELECT
    n,
    k,
    s_sq_between,
    s_sq_within,
    s_sq_between / s_sq_within AS F,
    c.wt_group_mean,
    c.wt_group_stddev,
    c.wt_n,
    c.mut_group_mean,
    c.mut_group_stddev,
    c.mut_n
    FROM
    numerator
    JOIN
    denominator
    ON
    numerator.dummy = denominator.dummy
    JOIN
    groupMeansTableForPlots AS c
    ON
    numerator.dummy = c.dummy )
    SELECT
    *
    FROM
    Ftable",
    sep="")


  #define body for the POST request Google BigQuery API
  body = list(
    query=q,
    defaultDataset.projectId=aproject,
    useLegacySql = F
  )

  #create a function to interact with Google BigQuery
  f = gar_api_generator("https://www.googleapis.com/bigquery/v2",
                        "POST",
                        path_args = list(projects=aproject, queries=""))

  #call function with body as input argument
  response = f(the_body=body)

  dat <- data.frame()
  if(!is.null(response))
  {
    dat = as.data.frame(do.call("rbind",lapply(response$content$rows$f,FUN = t)), stringsAsFactors = F)
  }
  if (ncol(dat) == 11) {
    colnames(dat) <- c("n", "k", "var_between", "var_within", "F_stat",
                       "wt_group_mean","wt_group_stddev","wt_n","mut_group_mean","mut_group_stddev","mut_n")
    dat <- lapply(dat, "as.numeric")
  }
  return(dat)
}

getData <- function(project, cohort, varname) {
  dat <- buildAndRunQuery(varname, project, cohort)
  print(dat)
  return(dat)
}

drawPlot <- function(dat) {
  fstat <- dat$F_stat
  kval <- dat$k
  nval <- dat$n
  mval1 <- dat$wt_group_mean
  sval1 <- dat$wt_group_stddev
  mval2 <- dat$mut_group_mean
  sval2 <- dat$mut_group_stddev
  x <- seq(from=0, to=fstat+2, by=0.1)
  y <- df(df1=(nval-kval),df2=kval, x=x)
  pval <- 1-pf(q=fstat, df1=(kval-1), df2=(nval-kval))
  titlestring = paste("F(", kval, ", ", (nval-kval), "), stat = ", fstat , ", p-value = ", pval, sep="")
  p1 <- qplot(x=x, y=y, geom="line") + geom_vline(xintercept=fstat, col="red", lwd=2) + ylab("") + xlab("F Statistic") + ggtitle(titlestring)
  p2 <- qplot(x=factor(c("WT","Mut")), y=c(mval1, mval2), xlab="WT and Mutated", ylab="log10 RNA-seq") +
        geom_errorbar(aes(
          ymin=c(mval1-sval1,mval2-sval2),
          ymax=c(mval1+sval1, mval2+sval2)), width=.2)
  multiplot(p1, p2)
}

errorPlot <- function() {
  plot(x=1,y=1,col=0,xlab="",ylab="", axes=FALSE);
  text(1,1,"please log in using your google ID", cex = 2)
}
	# This shows the contents of my global.R file, that contains the BigQuery SQL, googleAuthR code, and plots.


	# global.R

	#http://www.cookbook-r.com/Graphs/Multiple_graphs_on_one_page_(ggplot2)/
	multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
	library(grid)

	# Make a list from the ... arguments and plotlist
	plots <- c(list(...), plotlist)

	numPlots = length(plots)

	# If layout is NULL, then use 'cols' to determine layout
	if (is.null(layout)) {
	# Make the panel
	# ncol: Number of columns of plots
	# nrow: Number of rows needed, calculated from # of cols
	layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
	ncol = cols, nrow = ceiling(numPlots/cols))
	}

	if (numPlots==1) {
	print(plots[[1]])

	} else {
	# Set up the page
	grid.newpage()
	pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))

	# Make each plot, in the correct location
	for (i in 1:numPlots) {
	# Get the i,j matrix positions of the regions that contain this subplot
	matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))

	print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
	layout.pos.col = matchidx$col))
	}
	}
	}

	buildAndRunQuery <- function(varName, aproject, cohort) {
	q <- paste(
	"
	WITH
	--
	--
	--
	cohortExpr AS (
	SELECT
	sample_barcode,
	LOG10(normalized_count) AS expr
	FROM
	`isb-cgc.TCGA_hg19_data_v0.RNAseq_Gene_Expression_UNC_RSEM`
	WHERE
	project_short_name = '",cohort,"'
	AND HGNC_gene_symbol = '",varName,"'
	AND normalized_count IS NOT NULL
	AND normalized_count > 0),
	--
	--
	--
	cohortVar AS ( (
	SELECT
	sample_barcode_tumor AS sample_barcode,
	'Mut' AS mutation_status
	FROM
	`isb-cgc.TCGA_hg19_data_v0.Somatic_Mutation_MC3`
	WHERE
	SYMBOL = '",varName,"'
	AND Variant_Classification <> 'Silent'
	AND Variant_Type = 'SNP'
	AND IMPACT <> 'LOW')
	UNION ALL (
	SELECT
	sample_barcode,
	'WT' AS mutation_status
	FROM
	cohortExpr
	WHERE
	sample_barcode NOT IN (
	SELECT
	sample_barcode_tumor AS sample_barcode
	FROM
	`isb-cgc.TCGA_hg19_data_v0.Somatic_Mutation_MC3`
	WHERE
	SYMBOL = '",varName,"') ) ),
	--
	--
	cohort AS (
	SELECT
	cohortExpr.sample_barcode AS sample_barcode,
	mutation_status,
	expr
	FROM
	cohortExpr
	JOIN
	cohortVar
	ON
	cohortExpr.sample_barcode = cohortVar.sample_barcode ),
	--
	--
	grandMeanTable AS (
	SELECT
	AVG(expr) AS grand_mean
	FROM
	cohort ),
	--
	--
	groupMeans1 AS (
	SELECT
	'dummy' AS dummy,
	'WT' AS group_name,
	AVG(expr) AS group_mean,
	STDDEV(expr) AS group_stddev,
	COUNT(sample_barcode) AS n
	FROM
	cohort
	WHERE
	mutation_status = 'WT' ),
	--
	--
	groupMeans2 AS (
	SELECT
	'dummy' AS dummy,
	'Mut' AS group_name,
	AVG(expr) AS group_mean,
	STDDEV(expr) AS group_stddev,
	COUNT(sample_barcode) AS n
	FROM
	cohort
	WHERE
	mutation_status = 'Mut' ),
	--
	--
	groupMeansTable AS ( (
	SELECT
	*
	FROM
	groupMeans1)
	UNION ALL (
	SELECT
	*
	FROM
	groupMeans2) ),
	--
	--
	groupMeansTableForPlots AS (
	SELECT
	a.dummy AS dummy,
	a.group_mean AS wt_group_mean,
	a.group_stddev AS wt_group_stddev,
	a.n AS wt_n,
	b.group_mean AS mut_group_mean,
	b.group_stddev AS mut_group_stddev,
	b.n AS mut_n
	FROM
	groupMeans1 AS a
	JOIN
	groupMeans2 AS b
	ON
	a.dummy = b.dummy),
	--
	-- to get the variance between
	-- we sum the n*(group_mean - grand_mean)^2
	--
	ssBetween AS (
	SELECT
	group_name,
	group_mean,
	grand_mean,
	n,
	n*POW(group_mean - grand_mean,2) AS n_diff_sq
	FROM
	groupMeansTable
	CROSS JOIN
	grandMeanTable ),
	--
	-- Then join the group_means with the expression values
	-- for samples with WT status
	--
	ssWithin AS (
	SELECT
	*
	FROM (
	SELECT
	sample_barcode,
	mutation_status,
	expr,
	group_mean,
	POW(expr - group_mean,2) AS x_minus_xbar_sq
	FROM (
	SELECT
	sample_barcode,
	mutation_status,
	expr
	FROM
	cohort
	WHERE
	mutation_status = 'WT' )
	CROSS JOIN (
	SELECT
	group_mean
	FROM
	ssBetween
	WHERE
	group_name = 'WT' ) )
	--
	--
	UNION ALL
	--
	(
	SELECT
	sample_barcode,
	mutation_status,
	expr,
	group_mean,
	POW(expr - group_mean,2) AS x_minus_xbar_sq
	FROM (
	SELECT
	sample_barcode,
	mutation_status,
	expr
	FROM
	cohort
	WHERE
	mutation_status = 'Mut' )
	CROSS JOIN (
	SELECT
	group_mean
	FROM
	ssBetween
	WHERE
	group_name = 'Mut' ) ) ),
	--
	--
	--
	numerator AS (
	SELECT
	'dummy' AS dummy,
	SUM(n_diff_sq) AS s_sq_between
	FROM
	ssBetween ),
	--
	--
	--
	denominator AS (
	SELECT
	'dummy' AS dummy,
	COUNT(sample_barcode) AS n,
	'2' AS k,
	SUM(x_minus_xbar_sq)/(COUNT(sample_barcode)-2) AS s_sq_within
	FROM
	ssWithin ),
	--
	--
	--
	Ftable AS (
	SELECT
	n,
	k,
	s_sq_between,
	s_sq_within,
	s_sq_between / s_sq_within AS F,
	c.wt_group_mean,
	c.wt_group_stddev,
	c.wt_n,
	c.mut_group_mean,
	c.mut_group_stddev,
	c.mut_n
	FROM
	numerator
	JOIN
	denominator
	ON
	numerator.dummy = denominator.dummy
	JOIN
	groupMeansTableForPlots AS c
	ON
	numerator.dummy = c.dummy )
	SELECT
	*
	FROM
	Ftable",
	sep="")


	#define body for the POST request Google BigQuery API
	body = list(
	query=q,
	defaultDataset.projectId=aproject,
	useLegacySql = F
	)

	#create a function to interact with Google BigQuery
	f = gar_api_generator("https://www.googleapis.com/bigquery/v2",
	"POST",
	path_args = list(projects=aproject, queries=""))

	#call function with body as input argument
	response = f(the_body=body)

	dat <- data.frame()
	if(!is.null(response))
	{
	dat = as.data.frame(do.call("rbind",lapply(response$content$rows$f,FUN = t)), stringsAsFactors = F)
	}
	if (ncol(dat) == 11) {
	colnames(dat) <- c("n", "k", "var_between", "var_within", "F_stat",
	"wt_group_mean","wt_group_stddev","wt_n","mut_group_mean","mut_group_stddev","mut_n")
	dat <- lapply(dat, "as.numeric")
	}
	return(dat)
	}

	getData <- function(project, cohort, varname) {
	dat <- buildAndRunQuery(varname, project, cohort)
	print(dat)
	return(dat)
	}

	drawPlot <- function(dat) {
	fstat <- dat$F_stat
	kval <- dat$k
	nval <- dat$n
	mval1 <- dat$wt_group_mean
	sval1 <- dat$wt_group_stddev
	mval2 <- dat$mut_group_mean
	sval2 <- dat$mut_group_stddev
	x <- seq(from=0, to=fstat+2, by=0.1)
	y <- df(df1=(nval-kval),df2=kval, x=x)
	pval <- 1-pf(q=fstat, df1=(kval-1), df2=(nval-kval))
	titlestring = paste("F(", kval, ", ", (nval-kval), "), stat = ", fstat , ", p-value = ", pval, sep="")
	p1 <- qplot(x=x, y=y, geom="line") + geom_vline(xintercept=fstat, col="red", lwd=2) + ylab("") + xlab("F Statistic") + ggtitle(titlestring)
	p2 <- qplot(x=factor(c("WT","Mut")), y=c(mval1, mval2), xlab="WT and Mutated", ylab="log10 RNA-seq") +
	geom_errorbar(aes(
	ymin=c(mval1-sval1,mval2-sval2),
	ymax=c(mval1+sval1, mval2+sval2)), width=.2)
	multiplot(p1, p2)
	}

	errorPlot <- function() {
	plot(x=1,y=1,col=0,xlab="",ylab="", axes=FALSE);
	text(1,1,"please log in using your google ID", cex = 2)
	}