Skip to content

Instantly share code, notes, and snippets.

@smrgit
Created March 2, 2017 16:06
Show Gist options
  • Save smrgit/7650739aa89fcb258dfe890f0db93f96 to your computer and use it in GitHub Desktop.
Save smrgit/7650739aa89fcb258dfe890f0db93f96 to your computer and use it in GitHub Desktop.
extract data for MIMAT0000082 from hg19 and hg38 tables
WITH
hg38_d1 AS (
-- we start with a table at the aliquot level, in case there are multiple aliquots
-- for a single sample; the SUM() is to sum the isoforms since we're working
-- with the Isoform_Expression tables
SELECT
sample_barcode,
aliquot_barcode,
mirna_id,
mirna_accession,
mirna_transcript,
SUM(reads_per_million_miRNA_mapped) AS sumRPMM
FROM
`isb-cgc-04-0010.draft_new_data.TCGA_gdc_hg38_miRNA_Isoform_Expression`
WHERE
mirna_accession IS NOT NULL
GROUP BY
sample_barcode,
aliquot_barcode,
mirna_id,
mirna_accession,
mirna_transcript ),
hg38_d2 AS (
-- next we do a MAX() over the sumRPMM values from the aliquots and just
-- keep the sample_barcode; and we're also going to do a LOG()
SELECT
sample_barcode,
mirna_id,
mirna_accession,
mirna_transcript,
LOG(MAX(sumRPMM)) AS logRPMM
FROM
hg38_d1
GROUP BY
sample_barcode,
mirna_id,
mirna_accession,
mirna_transcript ),
hg19_d1 AS (
-- now we're going to do the same with the hg19 data ...
SELECT
sample_barcode,
aliquot_barcode,
mirna_id,
mirna_accession,
mirna_transcript,
SUM(reads_per_million_miRNA_mapped) AS sumRPMM
FROM
`isb-cgc-04-0010.draft_new_data.TCGA_gdc_hg19_miRNA_Isoform_Expression`
WHERE
mirna_accession IS NOT NULL
GROUP BY
sample_barcode,
aliquot_barcode,
mirna_id,
mirna_accession,
mirna_transcript ),
hg19_d2 AS (
SELECT
sample_barcode,
mirna_id,
mirna_accession,
mirna_transcript,
LOG(MAX(sumRPMM)) AS logRPMM
FROM
hg19_d1
GROUP BY
sample_barcode,
mirna_id,
mirna_accession,
mirna_transcript ),
j1 AS (
SELECT
a.logRPMM AS hg19_logRPMM,
b.logRPMM AS hg38_logRPMM,
b.sample_barcode AS sample_barcode,
b.mirna_id AS mirna_id,
b.mirna_accession AS mirna_accession,
b.mirna_transcript AS mirna_transcript
FROM
hg19_d2 a
JOIN
hg38_d2 b
ON
a.sample_barcode=b.sample_barcode
AND a.mirna_id=b.mirna_id
AND a.mirna_accession=b.mirna_accession
AND a.mirna_transcript=b.mirna_transcript )
SELECT
sample_barcode,
hg19_logRPMM,
hg38_logRPMM
FROM
j1
WHERE
mirna_id="hsa-mir-26a-1"
AND mirna_accession="MIMAT0000082"
AND mirna_transcript="mature"
ORDER BY
sample_barcode
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment