Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Total size of TCGA and NLST DICOM SM data in IDC v8 release
WITH
all_new_sm AS (
SELECT
collection_id,
COUNT(DISTINCT(SeriesInstanceUID)) AS num_series,
ROUND(SUM(instance_size)/(1024*1024*1024),2) AS size_GB
FROM
`bigquery-public-data.idc_v8.dicom_all`
WHERE
(collection_id LIKE "tcga%"
OR collection_id = "nlst")
AND Modality = "SM"
GROUP BY
collection_id
ORDER BY
collection_id)
SELECT
ROUND(SUM(size_GB)/1024,2) AS size_TB
FROM
all_new_sm
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment