Skip to content

Instantly share code, notes, and snippets.

@garystafford
Last active March 1, 2022 14:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save garystafford/40dcb36ab1b9fceb330acd64b8227627 to your computer and use it in GitHub Desktop.
Save garystafford/40dcb36ab1b9fceb330acd64b8227627 to your computer and use it in GitHub Desktop.
-- Purpose: Process data for sinusitis study using Amazon Athena
-- Author: Gary A. Stafford (January 2022)
CREATE TABLE "sinusitis_athena" WITH (
format = 'Parquet',
write_compression = 'SNAPPY',
external_location = 's3://databrew-demo-111222333444-us-east-1/sinusitis_athena/',
bucketed_by = ARRAY['patient'],
bucket_count = 1
) AS
SELECT DISTINCT
patient,
code,
description,
date_diff(
'day',
date(substr(birthdate, 1, 10)),
date(substr(start, 1, 10))
) as condition_age,
marital,
race,
ethnicity,
gender
FROM conditions AS c,
patients AS p
WHERE c.patient = p.id
AND gender <> ''
AND ethnicity <> ''
AND race <> ''
AND marital <> ''
AND description LIKE '%sinusitis%'
ORDER BY patient, code;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment