Created
February 20, 2023 01:04
-
-
Save wwood/8c0c321ae1b189f3f09e8841b5edeedd to your computer and use it in GitHub Desktop.
Estimate the total amount of public metagenome sequence using Google BigQuery
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- Loop counter | |
DECLARE counter int64 default 1; | |
-- Create intermediate table with organisms that are metagenomes | |
CREATE OR REPLACE TABLE test.emp | |
AS | |
WITH cte AS | |
( | |
SELECT 1 AS xlevel, tax_id, parent_id, sci_name | |
FROM `nih-sra-datastore.sra_tax_analysis_tool.taxonomy` e | |
WHERE e.sci_name = 'metagenomes' | |
) | |
SELECT * | |
FROM cte; | |
WHILE EXISTS ( | |
SELECT c.* | |
FROM test.emp p | |
INNER JOIN `nih-sra-datastore.sra_tax_analysis_tool.taxonomy` c ON p.tax_id = c.parent_id | |
WHERE p.xlevel = counter | |
) | |
DO | |
-- Insert next level | |
INSERT INTO test.emp ( xlevel, tax_id, parent_id, sci_name ) | |
SELECT counter + 1 AS xlevel, c.tax_id, c.parent_id, c.sci_name | |
FROM test.emp p | |
INNER JOIN `nih-sra-datastore.sra_tax_analysis_tool.taxonomy` c ON p.tax_id = c.parent_id | |
WHERE p.xlevel = counter; | |
SET counter = counter + 1; | |
-- Loop safely | |
IF counter > 10 | |
THEN | |
BREAK; | |
END IF; | |
END WHILE; | |
SELECT | |
sum(mbases) | |
FROM | |
`nih-sra-datastore.sra.metadata` | |
WHERE | |
( | |
librarysource = 'METAGENOMIC' | |
or | |
organism IN (SELECT sci_name FROM test.emp) | |
) | |
AND consent = 'public' | |
AND libraryselection = 'RANDOM' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment