Last active
September 15, 2017 17:16
-
-
Save smrgit/f2eca7b6009598b543d6bfaf4205efa3 to your computer and use it in GitHub Desktop.
Take a look at the types of open-access TXT and TSV data files that exist at the GDC but are *not* available in Google Cloud Storage.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
WITH | |
t1 AS ( | |
SELECT | |
dbName, | |
file_id AS file_gdc_id, | |
access, | |
cases__project__program__name AS program_name, | |
cases__project__project_id AS project_short_name, | |
experimental_strategy, | |
data_category, | |
data_format, | |
data_type, | |
file_size | |
FROM | |
`isb-cgc.GDC_metadata.rel8_fileData_current` | |
UNION ALL | |
SELECT | |
dbName, | |
file_id AS file_gdc_id, | |
access, | |
cases__project__program__name AS program_name, | |
cases__project__project_id AS project_short_name, | |
experimental_strategy, | |
data_category, | |
data_format, | |
data_type, | |
file_size | |
FROM | |
`isb-cgc.GDC_metadata.rel8_fileData_legacy` ), | |
j1 AS ( | |
SELECT | |
a.dbName, | |
a.access, | |
a.program_name, | |
a.project_short_name, | |
a.experimental_strategy, | |
a.data_category, | |
a.data_format, | |
a.data_type, | |
a.file_size, | |
a.file_gdc_id AS aFile_gdc_id, | |
b.file_gdc_id AS bFile_gdc_id, | |
b.file_gcs_url | |
FROM | |
t1 a | |
FULL JOIN | |
`isb-cgc.GDC_metadata.rel8_GDCfileID_to_GCSurl` b | |
ON | |
a.file_gdc_id=b.file_gdc_id ) | |
SELECT | |
dbName, | |
access, | |
program_name, | |
experimental_strategy, | |
data_category, | |
data_type, | |
COUNT(*) AS n, | |
SUM(file_size)/1000000000000 AS total_size_TB | |
FROM | |
j1 | |
WHERE | |
file_gcs_url IS NULL | |
AND access="open" | |
AND ( data_format="TXT" | |
OR data_format="TSV" ) | |
GROUP BY | |
dbName, | |
access, | |
program_name, | |
experimental_strategy, | |
data_category, | |
data_type | |
ORDER BY | |
n DESC |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
updated to release 8 tables