Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
BigQuery to perform Spearman's correlation over all pairs of genes given Reactome pathway definitions.
WITH
--
--
--
geneList AS (
SELECT
a.entrez AS Entrez,
Symbol AS HGNC_gene_symbol
FROM
`isb-cgc.QotM.Reactome_a1` a
JOIN
`isb-cgc.QotM.org_Hs_eg_db_v1` b
ON
a.entrez = b.Entrez
WHERE
pathwayName = 'CELL CYCLE'
GROUP BY
Symbol,
Entrez
ORDER BY
rand()
LIMIT 10),
--
--
geneAnnot AS (
SELECT
Entrez,
HGNC_gene_symbol,
seq_name,
MIN(start) AS geneStart
FROM
`isb-cgc.genome_reference.GENCODE_v24` a
JOIN
geneList
ON
geneList.HGNC_gene_symbol = a.gene_name
GROUP BY
Entrez,
HGNC_gene_symbol,
seq_name ),
--
--
--
cohortExpr1 AS (
SELECT
sample_barcode,
a.HGNC_gene_symbol,
seq_name,
geneStart,
RANK() OVER (PARTITION BY a.HGNC_gene_symbol ORDER BY normalized_count ASC) AS expr_rank
FROM
`isb-cgc.TCGA_hg19_data_v0.RNAseq_Gene_Expression_UNC_RSEM` a
JOIN
geneAnnot
ON
geneAnnot.HGNC_gene_symbol = a.HGNC_gene_symbol
WHERE
project_short_name = 'TCGA-LUAD'
AND normalized_count IS NOT NULL
AND normalized_count > 10
AND ENDS_WITH(sample_barcode, '-01A') ),
--
--
--
cohortExpr2 AS (
SELECT
sample_barcode,
a.HGNC_gene_symbol,
seq_name,
geneStart,
RANK() OVER (PARTITION BY a.HGNC_gene_symbol ORDER BY normalized_count ASC) AS expr_rank
FROM
`isb-cgc.TCGA_hg19_data_v0.RNAseq_Gene_Expression_UNC_RSEM` a
JOIN
geneAnnot
ON
geneAnnot.HGNC_gene_symbol = a.HGNC_gene_symbol
WHERE
project_short_name = 'TCGA-LUAD'
AND normalized_count IS NOT NULL
AND normalized_count > 0
AND ENDS_WITH(sample_barcode, '-01A') ),
--
--
--
jtab AS (
SELECT
cohortExpr1.sample_barcode,
cohortExpr2.sample_barcode,
cohortExpr1.HGNC_gene_symbol AS geneA,
cohortExpr1.seq_name AS chrA,
cohortExpr1.geneStart AS startA,
cohortExpr2.HGNC_gene_symbol AS geneB,
cohortExpr2.seq_name AS chrB,
cohortExpr2.geneStart AS startB,
cohortExpr1.expr_rank AS e1,
cohortExpr2.expr_rank AS e2
FROM
cohortExpr1
JOIN
cohortExpr2
ON
cohortExpr1.sample_barcode = cohortExpr2.sample_barcode
WHERE
cohortExpr1.HGNC_gene_symbol <> cohortExpr2.HGNC_gene_symbol
AND cohortExpr1.HGNC_gene_symbol > cohortExpr2.HGNC_gene_symbol
GROUP BY
cohortExpr1.sample_barcode,
cohortExpr2.sample_barcode,
cohortExpr1.HGNC_gene_symbol,
cohortExpr2.HGNC_gene_symbol,
cohortExpr1.seq_name,
cohortExpr1.geneStart,
cohortExpr2.seq_name,
cohortExpr2.geneStart,
cohortExpr1.expr_rank,
cohortExpr2.expr_rank ),
--
--
finalResult AS (
SELECT
geneA,
chrA,
startA,
geneB,
chrB,
startB,
corr(e1,
e2) AS spearmans
FROM
jtab
GROUP BY
geneA,
chrA,
startA,
geneB,
chrB,
startB
ORDER BY
ABS(spearmans) DESC
LIMIT
100
)
select * from finalResult
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.