Last active
October 26, 2017 02:23
-
-
Save smrgit/f55e69ad6e1284ed4af76a933916cf83 to your computer and use it in GitHub Desktop.
Identify GDC data available in ISB-CGC GCS buckets
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
WITH | |
t1 AS ( | |
SELECT | |
DISTINCT file_gdc_id | |
FROM | |
`isb-cgc.GDC_metadata.rel8_GDCfileID_to_GCSurl` ), | |
j1 AS ( | |
SELECT | |
a.dbName, | |
a.file_id, | |
a.acl, | |
a.cases__project__program__name AS program_name, | |
a.data_category, | |
a.data_type, | |
a.data_format, | |
a.experimental_strategy | |
FROM | |
`isb-cgc.GDC_metadata.rel8_fileData_legacy` a | |
JOIN | |
t1 b | |
ON | |
a.file_id=b.file_gdc_id ), | |
j2 AS ( | |
SELECT | |
a.dbName, | |
a.file_id, | |
a.acl, | |
a.cases__project__program__name AS program_name, | |
a.data_category, | |
a.data_type, | |
a.data_format, | |
a.experimental_strategy | |
FROM | |
`isb-cgc.GDC_metadata.rel8_fileData_current` a | |
JOIN | |
t1 b | |
ON | |
a.file_id=b.file_gdc_id ), | |
u1 AS ( | |
SELECT | |
* | |
FROM | |
j1 | |
UNION ALL | |
SELECT | |
* | |
FROM | |
j2 ) | |
SELECT | |
dbName, | |
acl, | |
program_name, | |
data_category, | |
data_format, | |
experimental_strategy, | |
COUNT(*) AS n | |
FROM | |
u1 | |
GROUP BY | |
1, | |
2, | |
3, | |
4, | |
5, | |
6 | |
ORDER BY | |
n DESC |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment