Created
February 27, 2018 02:06
Star
You must be signed in to star a gist
BigQuery to perform Spearman's correlation over all pairs of genes given Reactome pathway definitions.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
WITH | |
-- | |
-- | |
-- | |
geneList AS ( | |
SELECT | |
a.entrez AS Entrez, | |
Symbol AS HGNC_gene_symbol | |
FROM | |
`isb-cgc.QotM.Reactome_a1` a | |
JOIN | |
`isb-cgc.QotM.org_Hs_eg_db_v1` b | |
ON | |
a.entrez = b.Entrez | |
WHERE | |
pathwayName = 'CELL CYCLE' | |
GROUP BY | |
Symbol, | |
Entrez | |
ORDER BY | |
rand() | |
LIMIT 10), | |
-- | |
-- | |
geneAnnot AS ( | |
SELECT | |
Entrez, | |
HGNC_gene_symbol, | |
seq_name, | |
MIN(start) AS geneStart | |
FROM | |
`isb-cgc.genome_reference.GENCODE_v24` a | |
JOIN | |
geneList | |
ON | |
geneList.HGNC_gene_symbol = a.gene_name | |
GROUP BY | |
Entrez, | |
HGNC_gene_symbol, | |
seq_name ), | |
-- | |
-- | |
-- | |
cohortExpr1 AS ( | |
SELECT | |
sample_barcode, | |
a.HGNC_gene_symbol, | |
seq_name, | |
geneStart, | |
RANK() OVER (PARTITION BY a.HGNC_gene_symbol ORDER BY normalized_count ASC) AS expr_rank | |
FROM | |
`isb-cgc.TCGA_hg19_data_v0.RNAseq_Gene_Expression_UNC_RSEM` a | |
JOIN | |
geneAnnot | |
ON | |
geneAnnot.HGNC_gene_symbol = a.HGNC_gene_symbol | |
WHERE | |
project_short_name = 'TCGA-LUAD' | |
AND normalized_count IS NOT NULL | |
AND normalized_count > 10 | |
AND ENDS_WITH(sample_barcode, '-01A') ), | |
-- | |
-- | |
-- | |
cohortExpr2 AS ( | |
SELECT | |
sample_barcode, | |
a.HGNC_gene_symbol, | |
seq_name, | |
geneStart, | |
RANK() OVER (PARTITION BY a.HGNC_gene_symbol ORDER BY normalized_count ASC) AS expr_rank | |
FROM | |
`isb-cgc.TCGA_hg19_data_v0.RNAseq_Gene_Expression_UNC_RSEM` a | |
JOIN | |
geneAnnot | |
ON | |
geneAnnot.HGNC_gene_symbol = a.HGNC_gene_symbol | |
WHERE | |
project_short_name = 'TCGA-LUAD' | |
AND normalized_count IS NOT NULL | |
AND normalized_count > 0 | |
AND ENDS_WITH(sample_barcode, '-01A') ), | |
-- | |
-- | |
-- | |
jtab AS ( | |
SELECT | |
cohortExpr1.sample_barcode, | |
cohortExpr2.sample_barcode, | |
cohortExpr1.HGNC_gene_symbol AS geneA, | |
cohortExpr1.seq_name AS chrA, | |
cohortExpr1.geneStart AS startA, | |
cohortExpr2.HGNC_gene_symbol AS geneB, | |
cohortExpr2.seq_name AS chrB, | |
cohortExpr2.geneStart AS startB, | |
cohortExpr1.expr_rank AS e1, | |
cohortExpr2.expr_rank AS e2 | |
FROM | |
cohortExpr1 | |
JOIN | |
cohortExpr2 | |
ON | |
cohortExpr1.sample_barcode = cohortExpr2.sample_barcode | |
WHERE | |
cohortExpr1.HGNC_gene_symbol <> cohortExpr2.HGNC_gene_symbol | |
AND cohortExpr1.HGNC_gene_symbol > cohortExpr2.HGNC_gene_symbol | |
GROUP BY | |
cohortExpr1.sample_barcode, | |
cohortExpr2.sample_barcode, | |
cohortExpr1.HGNC_gene_symbol, | |
cohortExpr2.HGNC_gene_symbol, | |
cohortExpr1.seq_name, | |
cohortExpr1.geneStart, | |
cohortExpr2.seq_name, | |
cohortExpr2.geneStart, | |
cohortExpr1.expr_rank, | |
cohortExpr2.expr_rank ), | |
-- | |
-- | |
finalResult AS ( | |
SELECT | |
geneA, | |
chrA, | |
startA, | |
geneB, | |
chrB, | |
startB, | |
corr(e1, | |
e2) AS spearmans | |
FROM | |
jtab | |
GROUP BY | |
geneA, | |
chrA, | |
startA, | |
geneB, | |
chrB, | |
startB | |
ORDER BY | |
ABS(spearmans) DESC | |
LIMIT | |
100 | |
) | |
select * from finalResult |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment