This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
rm(list=ls()) | |
library(memisc) | |
library(assertthat) | |
library(sqldf) | |
library(magrittr) | |
library(dplyr) | |
library(reshape2) | |
library(ggplot2) | |
library(oz) | |
library(scatterpie) | |
library(rgdal) | |
library(maptools) | |
# Helper functions | |
data_prep <- function(csv_name) { | |
import <- read.csv(paste0("./Data/", csv_name), stringsAsFactors = F) | |
names(import) <- tolower(names(import)) | |
import <- import[, c("country.or.area", | |
"sex", | |
"age", | |
"cause.of.death..who.", | |
"record.type", | |
"value")] | |
names(import) <- c("country", "sex", "age", "death_cause", "record_type", "value") | |
assert_that(length(names(import)) == 6) | |
return(import) | |
} | |
############### | |
perform_groupby <- function(data, sex_filter, record_filter) { | |
#data <- import | |
#sex_filter <- "Female" | |
#record_filter <- "Data tabulated by year of occurrence" | |
final_data <- data %>% | |
filter (!sex == sex_filter, record_type == record_filter) %>% | |
group_by(country, death_cause) %>% | |
summarise(count = sum(value)) %>% | |
ungroup %>% | |
as.data.frame() | |
return(final_data) | |
} | |
pivot_by_country <- function(data) { | |
s1 = melt(data, id = c("country", "death_cause"), measure.vars = "count") | |
s2 = dcast(s1, country ~ death_cause, sum) | |
s2$Total = rowSums(s2[,2:NCOL(s2)]) | |
return(s2) | |
} | |
# Main Logic | |
death_data <- data_prep("death2014.csv") | |
grouped_data <- perform_groupby(death_data, "", "Data tabulated by year of occurrence") # Data tabulated by year of occurrence # Data tabulated by year of registration | |
grouped_data$death_cause <- gsub("*, ICD10", "", grouped_data$death_cause) | |
grouped_data$death_cause <- gsub("Symptoms, signs and abnormal clinical and laboratory findings, not elsewhere classified", "Abnormal clinical and lab findings", grouped_data$death_cause) | |
# Top 10 leading cause overall | |
overall_data <- grouped_data %>% | |
group_by(death_cause) %>% | |
summarise(totalcount = sum(count)) %>% | |
ungroup %>% | |
as.data.frame() | |
overall_data <- overall_data[order(-overall_data$totalcount), ] | |
top_ten_causes <- overall_data[2:10, "death_cause"] | |
top_ten_causes <- gsub("*, ICD10", "", top_ten_causes) | |
grouped_data$death_cause2 <- grouped_data$death_cause | |
grouped_data1 <- grouped_data %>% | |
filter (death_cause %in% top_ten_causes) | |
grouped_data2 <- grouped_data %>% | |
filter (!death_cause %in% c(top_ten_causes, "All causes")) | |
grouped_data2$death_cause2 <- "Others" | |
grouped_data <- rbind(grouped_data1[, c(1,3,4)], grouped_data2[, c(1,3,4)]) | |
names(grouped_data) <- c("country", "count", "death_cause") | |
pivotted_data <- pivot_by_country(grouped_data) | |
# Getting the coordinates of each country | |
country_lookup <- read.csv(paste0("./Data/", "countries.csv"), stringsAsFactors = F) | |
names(country_lookup)[1] <- "country_code" | |
# Combining data | |
final_data <- merge(x = pivotted_data, y = country_lookup, by.x = "country", by.y = "name", all.x = T) | |
# Data cleaning for plotting | |
final_data <- unique(final_data) | |
multiplier <- log10(final_data$Total) / log10(max(final_data$Total)) | |
final_data <- cbind(final_data, multiplier) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment