Last active
October 25, 2018 12:07
-
-
Save mfmakahiya/94c97874d722714149c7fc432885e0c7 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
rm(list=ls()) | |
library(memisc) | |
library(assertthat) | |
library(sqldf) | |
library(magrittr) | |
library(dplyr) | |
library(reshape2) | |
library(ggplot2) | |
library(oz) | |
library(scatterpie) | |
library(rgdal) | |
library(maptools) | |
# Helper functions | |
data_prep <- function(csv_name) { | |
import <- read.csv(paste0("./Data/", csv_name), stringsAsFactors = F) | |
names(import) <- tolower(names(import)) | |
import <- import[, c("country.or.area", | |
"sex", | |
"age", | |
"cause.of.death..who.", | |
"record.type", | |
"value")] | |
names(import) <- c("country", "sex", "age", "death_cause", "record_type", "value") | |
assert_that(length(names(import)) == 6) | |
return(import) | |
} | |
############### | |
perform_groupby <- function(data, sex_filter, record_filter) { | |
#data <- import | |
#sex_filter <- "Female" | |
#record_filter <- "Data tabulated by year of occurrence" | |
final_data <- data %>% | |
filter (!sex == sex_filter, record_type == record_filter) %>% | |
group_by(country, death_cause) %>% | |
summarise(count = sum(value)) %>% | |
ungroup %>% | |
as.data.frame() | |
return(final_data) | |
} | |
pivot_by_country <- function(data) { | |
s1 = melt(data, id = c("country", "death_cause"), measure.vars = "count") | |
s2 = dcast(s1, country ~ death_cause, sum) | |
s2$Total = rowSums(s2[,2:NCOL(s2)]) | |
return(s2) | |
} | |
# Main Logic | |
death_data <- data_prep("death2014.csv") | |
grouped_data <- perform_groupby(death_data, "", "Data tabulated by year of occurrence") # Data tabulated by year of occurrence # Data tabulated by year of registration | |
grouped_data$death_cause <- gsub("*, ICD10", "", grouped_data$death_cause) | |
grouped_data$death_cause <- gsub("Symptoms, signs and abnormal clinical and laboratory findings, not elsewhere classified", "Abnormal clinical and lab findings", grouped_data$death_cause) | |
# Top 10 leading cause overall | |
overall_data <- grouped_data %>% | |
group_by(death_cause) %>% | |
summarise(totalcount = sum(count)) %>% | |
ungroup %>% | |
as.data.frame() | |
overall_data <- overall_data[order(-overall_data$totalcount), ] | |
top_ten_causes <- overall_data[2:10, "death_cause"] | |
top_ten_causes <- gsub("*, ICD10", "", top_ten_causes) | |
grouped_data$death_cause2 <- grouped_data$death_cause | |
grouped_data1 <- grouped_data %>% | |
filter (death_cause %in% top_ten_causes) | |
grouped_data2 <- grouped_data %>% | |
filter (!death_cause %in% c(top_ten_causes, "All causes")) | |
grouped_data2$death_cause2 <- "Others" | |
grouped_data <- rbind(grouped_data1[, c(1,3,4)], grouped_data2[, c(1,3,4)]) | |
names(grouped_data) <- c("country", "count", "death_cause") | |
pivotted_data <- pivot_by_country(grouped_data) | |
# Getting the coordinates of each country | |
country_lookup <- read.csv(paste0("./Data/", "countries.csv"), stringsAsFactors = F) | |
names(country_lookup)[1] <- "country_code" | |
# Combining data | |
final_data <- merge(x = pivotted_data, y = country_lookup, by.x = "country", by.y = "name", all.x = T) | |
# Data cleaning for plotting | |
final_data <- unique(final_data) | |
multiplier <- log10(final_data$Total) / log10(max(final_data$Total)) | |
final_data <- cbind(final_data, multiplier) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment