Last active
September 8, 2017 15:09
-
-
Save klpn/8a9939cf6ffb0a186eb244bc1c18f553 to your computer and use it in GitHub Desktop.
Analyze Swedish cause-of-death and population data (assumed to be in data/2017-9-10-4Amod.csv and data/mfsv16.csv) transformed to CSV files.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using DataFrames | |
deaths = readtable(normpath("data", "2017-9-10-4Amod.csv")) | |
ages = DataFrame(age = collect(0:20), | |
agest = [0;0;1;collect(5:5:90)], | |
age2 =[0;1;fill(2,3);fill(3,6);fill(4,4);fill(5,2);fill(6,4)]) | |
ages2 = by(ages[:,[:agest, :age2]], :age2, | |
df -> DataFrame(agest=minimum(df[:agest]))) | |
pop = DataFrame() | |
pop1 = readtable(normpath("data", "mfsv16.csv")) | |
ages1 = DataFrame(age1 = collect(0:101), | |
age = [0;1;fill(2,4);map((x)->fld(x,5)+2, collect(5:89));fill(20,11)]) | |
pop1_age = join(pop1, ages1, on = :age1) | |
for sex in ["Kv"; "M"] | |
pop1_sex = pop1_age[pop1_age[:sex].==sex, [:n, :age]] | |
pop_sex = by(pop1_sex, :age, df -> DataFrame(sex = sex, n = sum(df[:n]))) | |
pop = vcat(pop, pop_sex) | |
end | |
function df_reg(sex, caexpr, ageaggr = false) | |
if caexpr == "pop" | |
df_sub = pop[pop[:sex].==sex, [:age, :n]] | |
else | |
df_sub = deaths[((map((x)->ismatch(Regex(caexpr), x), | |
convert(Vector{AbstractString}, deaths[:icd10]))) & (deaths[:sex].==sex)), | |
[:age, :n]] | |
end | |
df_aggr = by(df_sub, :age, df -> DataFrame(n=sum(df[:n]))) | |
df_aggr_age = join(df_aggr, ages, on = :age) | |
if ageaggr | |
df_aggr_age2 = by(df_aggr_age[:, [:n, :age2]], :age2, | |
df -> DataFrame(n=sum(df[:n]))) | |
return join(df_aggr_age2, ages2, on = :age2) | |
else | |
return df_aggr_age | |
end | |
end | |
function propframe(sex, ca1, ca2 = "A00-Y98", ageaggr = false) | |
ca1frame = df_reg(sex, ca1, ageaggr) | |
ca2frame = df_reg(sex, ca2, ageaggr) | |
propframe = DataFrame(age = ca1frame[1], agest = ca1frame[:agest], | |
prop = ca1frame[:n]./ca2frame[:n]) | |
return Dict(:sex => sex, :ca1 => ca1, :ca2 => ca2, :frame => propframe) | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment