Skip to content

Instantly share code, notes, and snippets.

@smrgit
Created July 22, 2017 02:50
Show Gist options
  • Save smrgit/270b90a409d615782e5601f667a82b14 to your computer and use it in GitHub Desktop.
Save smrgit/270b90a409d615782e5601f667a82b14 to your computer and use it in GitHub Desktop.
compare hg19 and hg38 copy-number segments at miRNA locations
WITH
hg38t1 AS (
SELECT
sample_barcode,
mirna_id AS mirna_name,
LOG(reads_per_million_miRNA_mapped+1,2) AS logRPM
FROM
`isb-cgc.TCGA_hg38_data_v0.miRNAseq_Expression`
WHERE
reads_per_million_miRNA_mapped > 4 ),
hg38t2 AS (
SELECT
SUBSTR(seq_id,4,2) AS chr,
start_pos,
end_pos,
id AS mirna_id,
name
FROM
`isb-cgc.genome_reference.miRBase_v21_hsa_gff3` ),
hg38j1 AS (
SELECT
a.sample_barcode,
a.mirna_name,
a.logRPM,
b.chr,
b.start_pos,
b.end_pos,
b.mirna_id
FROM
hg38t1 a
JOIN
hg38t2 b
ON
a.mirna_name=b.name ),
hg38j2 AS (
SELECT
a.sample_barcode,
a.mirna_name,
a.logRPM,
a.chr,
a.start_pos AS mirna_start,
a.end_pos AS mirna_end,
b.start_pos AS seg_start,
b.end_pos AS seg_end,
b.num_probes,
b.segment_mean AS seg_mean
FROM
hg38j1 a
JOIN
`isb-cgc.TCGA_hg38_data_v0.Copy_Number_Segment_Masked` b
ON
a.sample_barcode=b.sample_barcode
AND a.chr=b.chromosome
AND ( ( a.start_pos >= b.start_pos )
AND ( a.end_pos <= b.end_pos ) ) ),
hg19t1 AS (
SELECT
sample_barcode,
mirna_id AS mirna_name,
LOG(reads_per_million_miRNA_mapped+1,2) AS logRPM
FROM
`isb-cgc.TCGA_hg19_data_v0.miRNAseq_Expression`
WHERE
reads_per_million_miRNA_mapped > 4 ),
hg19t2 AS (
SELECT
SUBSTR(seq_id,4,2) AS chr,
start_pos,
end_pos,
id AS mirna_id,
name
FROM
`isb-cgc.genome_reference.miRBase_v20_hsa_gff3` ),
hg19j1 AS (
SELECT
a.sample_barcode,
a.mirna_name,
a.logRPM,
b.chr,
b.start_pos,
b.end_pos,
b.mirna_id
FROM
hg19t1 a
JOIN
hg19t2 b
ON
a.mirna_name=b.name ),
hg19j2 AS (
SELECT
a.sample_barcode,
a.mirna_name,
a.logRPM,
a.chr,
a.start_pos AS mirna_start,
a.end_pos AS mirna_end,
b.start_pos AS seg_start,
b.end_pos AS seg_end,
b.num_probes,
b.segment_mean AS seg_mean
FROM
hg19j1 a
JOIN
`isb-cgc.TCGA_hg38_data_v0.Copy_Number_Segment_Masked` b
ON
a.sample_barcode=b.sample_barcode
AND a.chr=b.chromosome
AND ( ( a.start_pos >= b.start_pos )
AND ( a.end_pos <= b.end_pos ) ) ),
bigJoin AS (
SELECT
a.sample_barcode,
a.mirna_name,
a.chr AS chr38,
a.mirna_start AS mirna_start38,
a.mirna_end AS mirna_end38,
a.seg_start AS seg_start38,
a.seg_end AS seg_end38,
a.num_probes AS num_probes38,
a.seg_mean AS seg_mean38,
b.chr AS chr19,
b.mirna_start AS mirna_start19,
b.mirna_end AS mirna_end19,
b.seg_start AS seg_start19,
b.seg_end AS seg_end19,
b.num_probes AS num_probes19,
b.seg_mean AS seg_mean19
FROM
hg38j2 a
JOIN
hg19j2 b
ON
a.sample_barcode=b.sample_barcode
AND a.mirna_name=b.mirna_name ),
stats AS (
SELECT
num_probes38,
num_probes19,
(num_probes38-num_probes19) AS delta_num,
seg_mean38,
seg_mean19,
(seg_mean38-seg_mean19) AS delta_mean
FROM
bigJoin
WHERE
num_probes38 <> num_probes19 )
SELECT
APPROX_QUANTILES(delta_mean,10) AS meanQ,
APPROX_QUANTILES(delta_num,10) AS numQ
FROM
stats
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment