Created
July 22, 2017 02:50
-
-
Save smrgit/270b90a409d615782e5601f667a82b14 to your computer and use it in GitHub Desktop.
compare hg19 and hg38 copy-number segments at miRNA locations
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
WITH | |
hg38t1 AS ( | |
SELECT | |
sample_barcode, | |
mirna_id AS mirna_name, | |
LOG(reads_per_million_miRNA_mapped+1,2) AS logRPM | |
FROM | |
`isb-cgc.TCGA_hg38_data_v0.miRNAseq_Expression` | |
WHERE | |
reads_per_million_miRNA_mapped > 4 ), | |
hg38t2 AS ( | |
SELECT | |
SUBSTR(seq_id,4,2) AS chr, | |
start_pos, | |
end_pos, | |
id AS mirna_id, | |
name | |
FROM | |
`isb-cgc.genome_reference.miRBase_v21_hsa_gff3` ), | |
hg38j1 AS ( | |
SELECT | |
a.sample_barcode, | |
a.mirna_name, | |
a.logRPM, | |
b.chr, | |
b.start_pos, | |
b.end_pos, | |
b.mirna_id | |
FROM | |
hg38t1 a | |
JOIN | |
hg38t2 b | |
ON | |
a.mirna_name=b.name ), | |
hg38j2 AS ( | |
SELECT | |
a.sample_barcode, | |
a.mirna_name, | |
a.logRPM, | |
a.chr, | |
a.start_pos AS mirna_start, | |
a.end_pos AS mirna_end, | |
b.start_pos AS seg_start, | |
b.end_pos AS seg_end, | |
b.num_probes, | |
b.segment_mean AS seg_mean | |
FROM | |
hg38j1 a | |
JOIN | |
`isb-cgc.TCGA_hg38_data_v0.Copy_Number_Segment_Masked` b | |
ON | |
a.sample_barcode=b.sample_barcode | |
AND a.chr=b.chromosome | |
AND ( ( a.start_pos >= b.start_pos ) | |
AND ( a.end_pos <= b.end_pos ) ) ), | |
hg19t1 AS ( | |
SELECT | |
sample_barcode, | |
mirna_id AS mirna_name, | |
LOG(reads_per_million_miRNA_mapped+1,2) AS logRPM | |
FROM | |
`isb-cgc.TCGA_hg19_data_v0.miRNAseq_Expression` | |
WHERE | |
reads_per_million_miRNA_mapped > 4 ), | |
hg19t2 AS ( | |
SELECT | |
SUBSTR(seq_id,4,2) AS chr, | |
start_pos, | |
end_pos, | |
id AS mirna_id, | |
name | |
FROM | |
`isb-cgc.genome_reference.miRBase_v20_hsa_gff3` ), | |
hg19j1 AS ( | |
SELECT | |
a.sample_barcode, | |
a.mirna_name, | |
a.logRPM, | |
b.chr, | |
b.start_pos, | |
b.end_pos, | |
b.mirna_id | |
FROM | |
hg19t1 a | |
JOIN | |
hg19t2 b | |
ON | |
a.mirna_name=b.name ), | |
hg19j2 AS ( | |
SELECT | |
a.sample_barcode, | |
a.mirna_name, | |
a.logRPM, | |
a.chr, | |
a.start_pos AS mirna_start, | |
a.end_pos AS mirna_end, | |
b.start_pos AS seg_start, | |
b.end_pos AS seg_end, | |
b.num_probes, | |
b.segment_mean AS seg_mean | |
FROM | |
hg19j1 a | |
JOIN | |
`isb-cgc.TCGA_hg38_data_v0.Copy_Number_Segment_Masked` b | |
ON | |
a.sample_barcode=b.sample_barcode | |
AND a.chr=b.chromosome | |
AND ( ( a.start_pos >= b.start_pos ) | |
AND ( a.end_pos <= b.end_pos ) ) ), | |
bigJoin AS ( | |
SELECT | |
a.sample_barcode, | |
a.mirna_name, | |
a.chr AS chr38, | |
a.mirna_start AS mirna_start38, | |
a.mirna_end AS mirna_end38, | |
a.seg_start AS seg_start38, | |
a.seg_end AS seg_end38, | |
a.num_probes AS num_probes38, | |
a.seg_mean AS seg_mean38, | |
b.chr AS chr19, | |
b.mirna_start AS mirna_start19, | |
b.mirna_end AS mirna_end19, | |
b.seg_start AS seg_start19, | |
b.seg_end AS seg_end19, | |
b.num_probes AS num_probes19, | |
b.seg_mean AS seg_mean19 | |
FROM | |
hg38j2 a | |
JOIN | |
hg19j2 b | |
ON | |
a.sample_barcode=b.sample_barcode | |
AND a.mirna_name=b.mirna_name ), | |
stats AS ( | |
SELECT | |
num_probes38, | |
num_probes19, | |
(num_probes38-num_probes19) AS delta_num, | |
seg_mean38, | |
seg_mean19, | |
(seg_mean38-seg_mean19) AS delta_mean | |
FROM | |
bigJoin | |
WHERE | |
num_probes38 <> num_probes19 ) | |
SELECT | |
APPROX_QUANTILES(delta_mean,10) AS meanQ, | |
APPROX_QUANTILES(delta_num,10) AS numQ | |
FROM | |
stats |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment