RHDZMOTA/alcohol_data_download.R

## alcohol_data_download.R
# Radar Plots
# Example: consumption of pure alcohol by type of beverage
library(tibble)


# Download data -----------------------------------------------------------
# Data by country and type of beverage
url <- "http://apps.who.int/gho/athena/data/data-text.csv?target=GHO/SA_0000001398&profile=text&filter=COUNTRY:*;REGION:EUR;ALCOHOLTYPE:*"

# Download and read files
file_name <- "consum_type"
download.file(url, file_name)
dataset <- read.table(file = file_name, header = TRUE, sep = ",",
                       stringsAsFactors = FALSE, na.strings = FALSE)

## data_cleaning.R
# Clean data --------------------------------------------------------------
library(tibble)

# Use data_frames to manipulate data.
# Index created to help filter relevant data
# Some columns are useless
dataset <- as_data_frame(dataset)
dataset <- dataset[, c("Country", "Beverage.Types", "Numeric")]
index <- 1:nrow(dataset)
dataset$Index <- index

# Unique countries, problem: some names are too long
# Solve problem (avoid loops when possible)
countries_unique <- unique(dataset$Country)
countries_problem <- c("Russian Federation",
                       "United Kingdom of Great Britain and Northern Ireland",
                       "The former Yugoslav republic of Macedonia",
                       "Republic of Moldova")
countries_solution <- c("Russia", "UK", "Macedonia", "Moldova")

countries_problem_index <- sapply(X = countries_problem,
            FUN = function (x) dataset[dataset$Country == x, "Index"]$Index )

for(i in 1:ncol(countries_problem_index)){
  dataset[countries_problem_index[,i], "Country"] <- countries_solution[i]
}

# Other problem: "Other" category is too large
# Solve problem (avoid loops when possible)
other_inconvinient <- dataset[dataset$Beverage.Types == "Other alcoholic beverages", "Index"]$Index
dataset[other_inconvinient, "Beverage.Types"] <- "Other"

# Add row to the data frame that contains the average consumption
beverage_unique <- unique(dataset$Beverage.Types)
beverage_average <- sapply(X = sapply(X = beverage_unique,
                                      FUN = function(x)
                                        dataset[dataset$Beverage.Types == x, "Numeric"]),
                           FUN = mean, na.rm = T)
names(beverage_average) <- beverage_unique
Numeric <- beverage_average
Beverage.Types <- beverage_unique
Country <- rep("Average Country", length(beverage_unique))
Index <- rep(as.character(nrow(dataset)+1), length(beverage_unique))
df_aux <- data_frame(Country = Country, Beverage.Types = Beverage.Types,
                        Numeric = Numeric, Index = Index)

dataset <- rbind(dataset, df_aux)
dataset <- dataset[order(dataset$Beverage.Types), ]
df_aux  <- df_aux[order(df_aux$Beverage.Types),]
avrg_df <- df_aux

# Identify position (index) of average country
# for future use in plot
countries_unique <- unique(dataset$Country)
countries_unique <- countries_unique[order(countries_unique)]
countries_unique_index <- 1:length(countries_unique)
df_aux <- data_frame(countries = countries_unique,
                     index = countries_unique_index)
avr_country <- "Average Country"
avr_index <- df_aux[df_aux$countries == avr_country, "index"]$index
	# Radar Plots
	# Example: consumption of pure alcohol by type of beverage
	library(tibble)


	# Download data -----------------------------------------------------------
	# Data by country and type of beverage
	url <- "http://apps.who.int/gho/athena/data/data-text.csv?target=GHO/SA_0000001398&profile=text&filter=COUNTRY:;REGION:EUR;ALCOHOLTYPE:"

	# Download and read files
	file_name <- "consum_type"
	download.file(url, file_name)
	dataset <- read.table(file = file_name, header = TRUE, sep = ",",
	stringsAsFactors = FALSE, na.strings = FALSE)
	# Clean data --------------------------------------------------------------
	library(tibble)

	# Use data_frames to manipulate data.
	# Index created to help filter relevant data
	# Some columns are useless
	dataset <- as_data_frame(dataset)
	dataset <- dataset[, c("Country", "Beverage.Types", "Numeric")]
	index <- 1:nrow(dataset)
	dataset$Index <- index

	# Unique countries, problem: some names are too long
	# Solve problem (avoid loops when possible)
	countries_unique <- unique(dataset$Country)
	countries_problem <- c("Russian Federation",
	"United Kingdom of Great Britain and Northern Ireland",
	"The former Yugoslav republic of Macedonia",
	"Republic of Moldova")
	countries_solution <- c("Russia", "UK", "Macedonia", "Moldova")

	countries_problem_index <- sapply(X = countries_problem,
	FUN = function (x) dataset[dataset$Country == x, "Index"]$Index )

	for(i in 1:ncol(countries_problem_index)){
	dataset[countries_problem_index[,i], "Country"] <- countries_solution[i]
	}

	# Other problem: "Other" category is too large
	# Solve problem (avoid loops when possible)
	other_inconvinient <- dataset[dataset$Beverage.Types == "Other alcoholic beverages", "Index"]$Index
	dataset[other_inconvinient, "Beverage.Types"] <- "Other"

	# Add row to the data frame that contains the average consumption
	beverage_unique <- unique(dataset$Beverage.Types)
	beverage_average <- sapply(X = sapply(X = beverage_unique,
	FUN = function(x)
	dataset[dataset$Beverage.Types == x, "Numeric"]),
	FUN = mean, na.rm = T)
	names(beverage_average) <- beverage_unique
	Numeric <- beverage_average
	Beverage.Types <- beverage_unique
	Country <- rep("Average Country", length(beverage_unique))
	Index <- rep(as.character(nrow(dataset)+1), length(beverage_unique))
	df_aux <- data_frame(Country = Country, Beverage.Types = Beverage.Types,
	Numeric = Numeric, Index = Index)

	dataset <- rbind(dataset, df_aux)
	dataset <- dataset[order(dataset$Beverage.Types), ]
	df_aux <- df_aux[order(df_aux$Beverage.Types),]
	avrg_df <- df_aux

	# Identify position (index) of average country
	# for future use in plot
	countries_unique <- unique(dataset$Country)
	countries_unique <- countries_unique[order(countries_unique)]
	countries_unique_index <- 1:length(countries_unique)
	df_aux <- data_frame(countries = countries_unique,
	index = countries_unique_index)
	avr_country <- "Average Country"
	avr_index <- df_aux[df_aux$countries == avr_country, "index"]$index