Last active
August 29, 2015 13:57
-
-
Save JimHaughwout/9456686 to your computer and use it in GitHub Desktop.
Re-creating some items from the past. Ways to ingest and control access to medical device data in C*
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--Example of nested (i.e., controlled access) to medical device data | |
--used as part of eSource for medical trials | |
CREATE KEYSPACE med_devices WITH replication = { | |
'class': 'SimpleStrategy', | |
'replication_factor': '3' | |
}; | |
USE med_devices; | |
-- This table captures highly sparse medical sensor device data | |
-- It is designed to easily control the SCOPE of selection: | |
-- by compound, trial or subject. It uses CDASH standard nomenclature. | |
-- but insets underscores and moves to lowecase to adhere to C* standards | |
-- It only uses a small subset of potenal vital signs data. | |
-- We could partition on (compound, trial) but this would restrict flexibility | |
create table med_device_data ( | |
compound text, | |
study_id text, | |
u_subj_id text, | |
visit text, | |
read_ts timestamp, | |
sensor_id text, | |
pulse_vsorres float, | |
temp_vsorres, float, | |
bp_sysbp_vsorres float, | |
bp_diabp_vsorres float, | |
nth_data_pt float, | |
PRIMARY KEY (compound, study_id, visit, u_subj_id, read_ts) ); | |
-- Swapped visit and subject to get all for visit N for trial arm | |
-- Load up some test data. Remember that all non PK values are optimized | |
-- So you have to re-order your CSV (in Excel or via Python output) | |
-- can also simply pass NOW() as a value if we used timeuuid vs timestampe | |
-- timeuuids have lower collision rates than simple incements for auto ID | |
COPY med_device_data (compound, study_id, visit, u_subj_id, read_ts, | |
bp_diapb_vsorres, bp_sysp_vsorres, nth_data_pt, pulse_vsorres, sensor_id, temp_vsorres) | |
FROM 'test-data.csv' WITH HEADER=TRUE; | |
--Lots of select options, all will be fast due to key traversal | |
SELECT * FROM med_device_data where compound='AMG-162' | |
SELECT * FROM med_device_data where compound='AMG-162' AND study_id='Dmab 216' | |
SELECT * FROM med_device_data where compound='AMG-162' AND study_id='Dmab 216' AND visit='VIS1D' | |
SELECT * FROM med_device_data where compound='AMG-162' AND study_id='Dmab 216' AND visit='ARM 4: Screening' AND u_subj_id='ATO-0043' | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment