Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Batch processing of health data using QRISK®2-2015 algorithm
#include <iostream>
#include <fstream>
#include <sstream>
#include <vector>
#include <cmath>
#include <cstring>
using std::string;
// source: https://qrisk.org/2015/QRISK2-2015-lgpl-source.tgz
/*static*/ double cvd_male_raw(
int age,int b_AF,int b_ra,int b_renal,int b_treatedhyp,int b_type1,int b_type2,double bmi,int ethrisk,int fh_cvd,double rati,double sbp,int smoke_cat,int surv,double town
)
{
surv = 10;
double survivor[16] = {
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0.978794217109680,
0,
0,
0,
0,
0
};
/* The conditional arrays */
double Iethrisk[10] = {
0,
0,
0.3173321430481919100000000,
0.4738590786081115500000000,
0.5171314655968145500000000,
0.1370301157366419200000000,
-0.3885522304972663900000000,
-0.3812495485312194500000000,
-0.4064461381650994500000000,
-0.2285715521377336100000000
};
double Ismoke[5] = {
0,
0.2684479158158020200000000,
0.6307674973877591700000000,
0.7178078883378695700000000,
0.8704172533465485100000000
};
/* Applying the fractional polynomial transforms */
/* (which includes scaling) */
double dage = age;
dage=dage/10;
double age_1 = pow(dage,-1);
double age_2 = pow(dage,2);
double dbmi = bmi;
dbmi=dbmi/10;
double bmi_2 = pow(dbmi,-2)*log(dbmi);
double bmi_1 = pow(dbmi,-2);
/* Centring the continuous variables */
age_1 = age_1 - 0.233734160661697;
age_2 = age_2 - 18.304403305053711;
bmi_1 = bmi_1 - 0.146269768476486;
bmi_2 = bmi_2 - 0.140587374567986;
rati = rati - 4.321151256561279;
sbp = sbp - 130.589752197265620;
town = town - 0.551009356975555;
/* Start of Sum */
double a=0;
/* The conditional sums */
a += Iethrisk[ethrisk];
a += Ismoke[smoke_cat];
/* Sum from continuous values */
a += age_1 * -18.0437312550377270000000000;
a += age_2 * 0.0236486454254306940000000;
a += bmi_1 * 2.5388084343581578000000000;
a += bmi_2 * -9.1034725871528597000000000;
a += rati * 0.1684397636136909500000000;
a += sbp * 0.0105003089380754820000000;
a += town * 0.0323801637634487590000000;
/* Sum from boolean values */
a += b_AF * 1.0363048000259454000000000;
a += b_ra * 0.2519953134791012600000000;
a += b_renal * 0.8359352886995286000000000;
a += b_treatedhyp * 0.6603459695917862600000000;
a += b_type1 * 1.3309170433446138000000000;
a += b_type2 * 0.9454348892774417900000000;
a += fh_cvd * 0.5986037897136281500000000;
/* Sum from interaction terms */
a += age_1 * (smoke_cat==1) * 0.6186864699379683900000000;
a += age_1 * (smoke_cat==2) * 1.5522017055600055000000000;
a += age_1 * (smoke_cat==3) * 2.4407210657517648000000000;
a += age_1 * (smoke_cat==4) * 3.5140494491884624000000000;
a += age_1 * b_AF * 8.0382925558108482000000000;
a += age_1 * b_renal * -1.6389521229064483000000000;
a += age_1 * b_treatedhyp * 8.4621771382346651000000000;
a += age_1 * b_type1 * 5.4977016563835504000000000;
a += age_1 * b_type2 * 3.3974747488766690000000000;
a += age_1 * bmi_1 * 33.8489881012767600000000000;
a += age_1 * bmi_2 * -140.6707025404897100000000000;
a += age_1 * fh_cvd * 2.0858333154353321000000000;
a += age_1 * sbp * 0.0501283668830720540000000;
a += age_1 * town * -0.1988268217186850700000000;
a += age_2 * (smoke_cat==1) * -0.0040893975066796338000000;
a += age_2 * (smoke_cat==2) * -0.0056065852346001768000000;
a += age_2 * (smoke_cat==3) * -0.0018261006189440492000000;
a += age_2 * (smoke_cat==4) * -0.0014997157296173290000000;
a += age_2 * b_AF * 0.0052471594895864343000000;
a += age_2 * b_renal * -0.0179663586193546390000000;
a += age_2 * b_treatedhyp * 0.0092088445323379176000000;
a += age_2 * b_type1 * 0.0047493510223424558000000;
a += age_2 * b_type2 * -0.0048113775783491563000000;
a += age_2 * bmi_1 * 0.0627410757513945650000000;
a += age_2 * bmi_2 * -0.2382914909385732100000000;
a += age_2 * fh_cvd * -0.0049971149213281010000000;
a += age_2 * sbp * -0.0000523700987951435090000;
a += age_2 * town * -0.0012518116569283104000000;
/* Calculate the score itself */
double score = 100.0 * (1 - pow(survivor[surv], exp(a)) );
return score;
}
/*static*/ double cvd_female_raw(
int age,int b_AF,int b_ra,int b_renal,int b_treatedhyp,int b_type1,int b_type2,double bmi,int ethrisk,int fh_cvd,double rati,double sbp,int smoke_cat,int surv,double town
)
{
surv = 10;
double survivor[16] = {
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0.989747583866119,
0,
0,
0,
0,
0
};
/* The conditional arrays */
double Iethrisk[10] = {
0,
0,
0.2574099349831925900000000,
0.6129795430571779400000000,
0.3362159841669621300000000,
0.1512517303224336400000000,
-0.1794156259657768100000000,
-0.3503423610057745400000000,
-0.2778372483233216800000000,
-0.1592734122665366000000000
};
double Ismoke[5] = {
0,
0.2119377108760385200000000,
0.6618634379685941500000000,
0.7570714587132305600000000,
0.9496298251457036000000000
};
/* Applying the fractional polynomial transforms */
/* (which includes scaling) */
double dage = age;
dage=dage/10;
double age_2 = dage;
double age_1 = pow(dage,.5);
double dbmi = bmi;
dbmi=dbmi/10;
double bmi_2 = pow(dbmi,-2)*log(dbmi);
double bmi_1 = pow(dbmi,-2);
/* Centring the continuous variables */
age_1 = age_1 - 2.086397409439087;
age_2 = age_2 - 4.353054523468018;
bmi_1 = bmi_1 - 0.152244374155998;
bmi_2 = bmi_2 - 0.143282383680344;
rati = rati - 3.506655454635620;
sbp = sbp - 125.040039062500000;
town = town - 0.416743695735931;
/* Start of Sum */
double a=0;
/* The conditional sums */
a += Iethrisk[ethrisk];
a += Ismoke[smoke_cat];
/* Sum from continuous values */
a += age_1 * 4.4417863976316578000000000;
a += age_2 * 0.0281637210672999180000000;
a += bmi_1 * 0.8942365304710663300000000;
a += bmi_2 * -6.5748047596104335000000000;
a += rati * 0.1433900561621420900000000;
a += sbp * 0.0128971795843613720000000;
a += town * 0.0664772630011438850000000;
/* Sum from boolean values */
a += b_AF * 1.6284780236484424000000000;
a += b_ra * 0.2901233104088770700000000;
a += b_renal * 1.0043796680368302000000000;
a += b_treatedhyp * 0.6180430562788129500000000;
a += b_type1 * 1.8400348250874599000000000;
a += b_type2 * 1.1711626412196512000000000;
a += fh_cvd * 0.5147261203665195500000000;
/* Sum from interaction terms */
a += age_1 * (smoke_cat==1) * 0.7464406144391666500000000;
a += age_1 * (smoke_cat==2) * 0.2568541711879666600000000;
a += age_1 * (smoke_cat==3) * -1.5452226707866523000000000;
a += age_1 * (smoke_cat==4) * -1.7113013709043405000000000;
a += age_1 * b_AF * -7.0177986441269269000000000;
a += age_1 * b_renal * -2.9684019256454390000000000;
a += age_1 * b_treatedhyp * -4.2219906452967848000000000;
a += age_1 * b_type1 * 1.6835769546040080000000000;
a += age_1 * b_type2 * -2.9371798540034648000000000;
a += age_1 * bmi_1 * 0.1797196207044682300000000;
a += age_1 * bmi_2 * 40.2428166760658140000000000;
a += age_1 * fh_cvd * 0.1439979240753906700000000;
a += age_1 * sbp * -0.0362575233899774460000000;
a += age_1 * town * 0.3735138031433442600000000;
a += age_2 * (smoke_cat==1) * -0.1927057741748231000000000;
a += age_2 * (smoke_cat==2) * -0.1526965063458932700000000;
a += age_2 * (smoke_cat==3) * 0.2313563976521429400000000;
a += age_2 * (smoke_cat==4) * 0.2307165013868296700000000;
a += age_2 * b_AF * 1.1395776028337732000000000;
a += age_2 * b_renal * 0.4356963208330940600000000;
a += age_2 * b_treatedhyp * 0.7265947108887239600000000;
a += age_2 * b_type1 * -0.6320977766275653900000000;
a += age_2 * b_type2 * 0.4023270434871086800000000;
a += age_2 * bmi_1 * 0.1319276622711877700000000;
a += age_2 * bmi_2 * -7.3211322435546409000000000;
a += age_2 * fh_cvd * -0.1330260018273720400000000;
a += age_2 * sbp * 0.0045842850495397955000000;
a += age_2 * town * -0.0952370300845990780000000;
/* Calculate the score itself */
double score = 100.0 * (1 - pow(survivor[surv], exp(a)) );
return score;
}
// handle any line endings
// https://stackoverflow.com/a/6089413/1971301
std::istream& safeGetline(std::istream& is, std::string& t)
{
t.clear();
// The characters in the stream are read one-by-one using a std::streambuf.
// That is faster than reading them one-by-one using the std::istream.
// Code that uses streambuf this way must be guarded by a sentry object.
// The sentry object performs various tasks,
// such as thread synchronization and updating the stream state.
std::istream::sentry se(is, true);
std::streambuf* sb = is.rdbuf();
for(;;) {
int c = sb->sbumpc();
switch (c) {
case '\n':
return is;
case '\r':
if(sb->sgetc() == '\n')
sb->sbumpc();
return is;
case std::streambuf::traits_type::eof():
// Also handle the case when the last line has no line ending
if(t.empty())
is.setstate(std::ios::eofbit);
return is;
default:
t += (char)c;
}
}
}
const char *formattedDouble(double v)
{
const char *fmt = "%.1f";
int sz = std::snprintf(nullptr, 0, fmt, v);
std::vector<char> buf(sz + 1);
std::snprintf(&buf[0], buf.size(), fmt, v);
return buf.data();
}
int main (int argc, char const *argv[])
{
const char delimeter = ';';
std::ifstream in(argc > 1 ? argv[1] : "data.csv", std::ios::in);
if(!in)
{
std::cerr << "error opening input CSV file\n";
return 1;
}
// skip first line (with column names)
string foo;
in >> foo;
std::ofstream out("out.csv", std::ios::out);
if(!out)
{
std::cerr << "error opening output file out.csv\n";
return 2;
}
while(!in.eof())
{
string s;
safeGetline(in, s);
if (s.empty())
continue;
// sex;age;AF;ra;renal;treatedhyp;type1;type2;bmi;ethrisk;fh_cvd;rati;sbp;smoke_cat
std::istringstream iss(s);
string sex;
std::getline(iss, sex, delimeter);
double(*scoreFunc)(int,int,int,int,int,int,int,double,int,int,double,double,int,int,double) = sex == "0" ? cvd_female_raw : cvd_male_raw;
string ageStr;
std::getline(iss, ageStr, delimeter);
int age = 0;
try {
age = std::stoi(ageStr);
} catch (std::exception e) {}
string afStr;
std::getline(iss, afStr, delimeter);
int af = 0;
try {
af = std::stoi(afStr);
} catch (std::exception e) {}
string raStr;
std::getline(iss, raStr, delimeter);
int ra = 0;
try {
ra = std::stoi(raStr);
} catch (std::exception e) {}
string renalStr;
std::getline(iss, renalStr, delimeter);
int renal = 0;
try {
renal = std::stoi(renalStr);
} catch (std::exception e) {}
string treatedhypStr;
std::getline(iss, treatedhypStr, delimeter);
int treatedhyp = 0;
try {
treatedhyp = std::stoi(treatedhypStr);
} catch (std::exception e) {}
string type1Str;
std::getline(iss, type1Str, delimeter);
int type1 = 0;
try {
type1 = std::stoi(type1Str);
} catch (std::exception e) {}
string type2Str;
std::getline(iss, type2Str, delimeter);
int type2 = 0;
try {
type2 = std::stoi(type2Str);
} catch (std::exception e) {}
string bmiStr;
std::getline(iss, bmiStr, delimeter);
double bmi = 0.0;
try {
bmi = std::stod(bmiStr);
} catch (std::exception e) {}
string ethriskStr;
std::getline(iss, ethriskStr, delimeter);
int ethrisk = 0;
try {
ethrisk = std::stoi(ethriskStr);
} catch (std::exception e) {}
string fh_cvdStr;
std::getline(iss, fh_cvdStr, delimeter);
int fh_cvd = 0;
try {
fh_cvd = std::stoi(fh_cvdStr);
} catch (std::exception e) {}
string ratiStr;
std::getline(iss, ratiStr, delimeter);
double rati = 4.0;
try {
rati = std::stod(ratiStr);
} catch (std::exception e) {}
string sbpStr;
std::getline(iss, sbpStr, delimeter);
double sbp = 0.0;
try {
sbp = std::stod(sbpStr);
} catch (std::exception e) {}
string smokeStr;
std::getline(iss, smokeStr, delimeter);
int smoke = 0;
try {
smoke = std::stoi(smokeStr);
} catch (std::exception e) {}
double score = scoreFunc(age, af, ra, renal, treatedhyp, type1, type2, bmi, ethrisk, fh_cvd, rati, sbp, smoke, 10, 0.0);
double healthyScore = scoreFunc(age, 0, 0, 0, 0, 0, 0, 25.0, ethrisk, 0, 4.0, 125.0, 0, 10, 0.0);
out << formattedDouble(score) << delimeter << formattedDouble(healthyScore) << delimeter << formattedDouble(score / healthyScore) << delimeter;
// heart age calculation
while (healthyScore < score && age <= 100) {
healthyScore = scoreFunc(++age, 0, 0, 0, 0, 0, 0, 25.0, ethrisk, 0, 4.0, 125.0, 0, 10, 0.0);
}
out << age << delimeter << formattedDouble(healthyScore) << std::endl;
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.