Create a gist now

Instantly share code, notes, and snippets.

CREATE OR REPLACE FUNCTION kaplan_meier(text)
RETURNS TABLE (
time_period int,
pivot text,
exposed int,
events int,
marginal_probability double precision,
cumulative_probability double precision
) AS
$PROC$
DECLARE
tablename ALIAS FOR $1;
BEGIN
RETURN QUERY EXECUTE
$$
WITH
stacked_events as (
SELECT
pivot,
0 AS uncensored,
1 AS censored,
0 AS time_period
FROM $$ || tablename || $$
union all
SELECT
pivot,
CASE WHEN event::boolean THEN -1 ELSE 0 END AS uncensored,
CASE WHEN event::boolean THEN 0 ELSE -1 END AS censored,
time_period AS time_period
FROM $$ || tablename || $$
),
all_segment AS (
SELECT
time_period,
pivot,
SUM(uncensored) AS uncensored,
SUM(censored) AS censored
FROM stacked_events
GROUP BY time_period, pivot
),
relative_cumulative_probability AS (
SELECT time_period,
pivot,
1 + (uncensored)
/ COALESCE(
SUM(uncensored) OVER lag_w + SUM(censored) OVER lag_w,
censored
)::numeric AS marginal_probability,
-1 * uncensored AS events,
COALESCE(
SUM(uncensored) OVER lag_w + SUM(censored) OVER lag_w,
censored
) AS exposed
FROM all_segment
window lag_w AS (
PARTITION BY pivot ORDER BY time_period
rows BETWEEN unbounded preceding AND 1 preceding
)
),
cumulative_probability AS (
SELECT time_period,
pivot,
exposed,
events,
marginal_probability,
product_agg(marginal_probability) OVER w AS cumulative_probability
FROM relative_cumulative_probability
WINDOW w AS (
PARTITION BY pivot ORDER BY time_period
)
)
SELECT
time_period::int,
trim(pivot)::text,
exposed::int,
events::int,
marginal_probability::double precision,
cumulative_probability::double precision
FROM cumulative_probability
ORDER BY
pivot,
time_period
$$;
END
$PROC$
language plpgsql;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment