Last active
January 21, 2019 19:32
-
-
Save cutendorf/3e53972a32033730ce1ce1f0cbee5647 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--- | |
title: "DataViz with ggplot" | |
subtitle: "IE School of Human Sciences & Technology" | |
author: "by MBD O1 Group 1" | |
date: "`r Sys.Date()`" | |
output: | |
prettydoc::html_pretty: | |
theme: leonids | |
highlight: github | |
--- | |
The following project focusses on **data visualization** with the R library **ggplot2**. For this purpose, we selected the **Tour de France dataset** that can be found the under following <span style="color:steelblue">[link](https://gist.githubusercontent.com/cutendorf/105806d901b4d51fc1f7bdc278d66f6e/raw/cd855a6f034aab0744857cfd9b9eb7f275627b5b/Tour_de_France.csv)</span>. | |
The R code that was used to create this markdown is available under the following <span style="color:steelblue">[link](https://gist.githubusercontent.com/cutendorf/3e53972a32033730ce1ce1f0cbee5647/raw/2b5ee18ffd0efa27b9df6dc23044509eeafa0b7f/DataViz_Assignment1_ggplot_Group1.Rmd)</span>. | |
This project was created by **group 1**: | |
*Alfonso Bucag, Vaibhav Jaitly, Alberto Lombatti, Gabriella Riber, Alvaro | |
Romero, Jonathan Serrano, Christine Utendorf* | |
<summary>**Importing Libraries** | |
First, all libraries need to be installed and loaded. | |
For this projects the following libraries were used: *prettydoc, highcharter, dplyr, tidyr, ggalt, scales, evaluate, ggplot2, ggthemes, RColorBrewer, psych, reshape2, plotly, ggmap, maps, mapdata, extrafont, grid, rworldmap, mapproj, emojifont, waffle* | |
</summary> | |
```{r results='hide', message=FALSE, warning=FALSE, echo=FALSE} | |
# Get prettydoc package if not installed | |
if(!"prettydoc" %in% installed.packages()) { | |
install.packages("prettydoc") | |
} else { | |
print('prettydoc package already installed') | |
} | |
library(prettydoc) | |
# Get highcharter package if not installed | |
if(!"highcharter" %in% installed.packages()) { | |
install.packages("highcharter") | |
} else { | |
print('highcharter package already installed') | |
} | |
library(highcharter) | |
# Get dplyr package if not installed | |
if(!"dplyr" %in% installed.packages()) { | |
install.packages("dplyr") | |
} else { | |
print('dplyr package already installed') | |
} | |
library(dplyr) | |
# Get tidyr package if not installed | |
if(!"tidyr" %in% installed.packages()) { | |
install.packages("tidyr") | |
} else { | |
print('tidyr package already installed') | |
} | |
library(tidyr) | |
# Get ggalt package if not installed | |
if(!"ggalt" %in% installed.packages()) { | |
install.packages("ggalt") | |
} else { | |
print('ggalt package already installed') | |
} | |
library(ggalt) | |
# Get scales package if not installed | |
if(!"scales" %in% installed.packages()) { | |
install.packages("scales") | |
} else { | |
print('scales package already installed') | |
} | |
library(scales) | |
# Get evaluate package if not installed | |
if(!"evaluate" %in% installed.packages()) { | |
install.packages("evaluate") | |
} else { | |
print('evaluate package already installed') | |
} | |
library(evaluate) | |
# Get ggplot2 package if not installed | |
if(!"ggplot2" %in% installed.packages()) { | |
install.packages("ggplot2") | |
} else { | |
print('ggplot2 package already installed') | |
} | |
library(ggplot2) | |
# Get ggthemes package if not installed | |
if(!"ggthemes" %in% installed.packages()) { | |
install.packages("ggthemes") | |
} else { | |
print('ggthemes package already installed') | |
} | |
library(ggthemes) | |
# Get RColorBrewer package if not installed | |
if(!"RColorBrewer" %in% installed.packages()) { | |
install.packages("RColorBrewer") | |
} else { | |
print('RColorBrewer package already installed') | |
} | |
library(RColorBrewer) | |
# Get psych package if not installed | |
if(!"psych" %in% installed.packages()) { | |
install.packages("psych") | |
} else { | |
print('psych package already installed') | |
} | |
library(psych) | |
# Get reshape2 package if not installed | |
if(!"reshape2" %in% installed.packages()) { | |
install.packages("reshape2") | |
} else { | |
print('reshape2 package already installed') | |
} | |
library(reshape2) | |
# Get plotly package if not installed | |
if(!"plotly" %in% installed.packages()) { | |
install.packages("plotly") | |
} else { | |
print('plotly package already installed') | |
} | |
library(plotly) | |
# Get ggmap package if not installed | |
if(!"ggmap" %in% installed.packages()) { | |
install.packages("ggmap") | |
} else { | |
print('ggmap package already installed') | |
} | |
library(ggmap) | |
# Get maps package if not installed | |
if(!"maps" %in% installed.packages()) { | |
install.packages("maps") | |
} else { | |
print('maps package already installed') | |
} | |
library(maps) | |
# Get mapdata package if not installed | |
if(!"mapdata" %in% installed.packages()) { | |
install.packages("mapdata") | |
} else { | |
print('mapdata package already installed') | |
} | |
library(mapdata) | |
# Get extrafont package if not installed | |
if(!"extrafont" %in% installed.packages()) { | |
install.packages("extrafont") | |
} else { | |
print('extrafont package already installed') | |
} | |
library(extrafont) | |
# Get grid package if not installed | |
if(!"grid" %in% installed.packages()) { | |
install.packages("grid") | |
} else { | |
print('grid package already installed') | |
} | |
library(grid) | |
# Get rworldmap package if not installed | |
if(!"rworldmap" %in% installed.packages()) { | |
install.packages("rworldmap") | |
} else { | |
print('rworldmap package already installed') | |
} | |
library(rworldmap) | |
# Get mapproj package if not installed | |
if(!"mapproj" %in% installed.packages()) { | |
install.packages("mapproj") | |
} else { | |
print('mapproj package already installed') | |
} | |
library(mapproj) | |
# Get emojifont package if not installed | |
if(!"emojifont" %in% installed.packages()) { | |
install.packages("emojifont") | |
} else { | |
print('emojifont package already installed') | |
} | |
library(emojifont) | |
# Get waffle package if not installed | |
if(!"waffle" %in% installed.packages()) { | |
install.packages("waffle") | |
} else { | |
print('waffle package already installed') | |
} | |
library(waffle) | |
``` | |
<summary>**Dataset - Tour de France** | |
To start with the data visualization we chose the data set of the Tour de France, which includes information about all the races since the start in 1903. First, the data set needs to be loaded and afterwards some further categories as well as datatables were created for the following charts. The charts shown below include basic ggplot charts, tufte charts, a waffle chart, a dumbbell chart and a map chart. Each category with the specific chart type and the title of each graph can be found above the respective graphic.</summary> | |
```{r, echo=FALSE} | |
# Reading data | |
tour <- read.csv('https://gist.githubusercontent.com/cutendorf/105806d901b4d51fc1f7bdc278d66f6e/raw/cd855a6f034aab0744857cfd9b9eb7f275627b5b/Tour_de_France.csv', | |
sep = ',') | |
min_max_speed <- read.csv('https://gist.githubusercontent.com/cutendorf/b3fdf0e53b3f116010dc79558e534024/raw/78c49df1693ef8246914b3feee6109aba60aa621/Min_Max_speed.csv', sep = ';') | |
# Creating categories for the distance | |
tour$Categories<-'2000-3000' | |
tour$Categories[tour$Distance>=3000]<-'3000-4000' | |
tour$Categories[tour$Distance>=4000]<-'4000-5000' | |
tour$Categories[tour$Distance>=5000]<-'5000-6000' | |
``` | |
<summary>**Basic Chart - Line Chart - Total Entrants and Total Finishers**</summary> | |
```{r, echo=FALSE} | |
# Entrants vs. Finishers | |
EntrantsvsFinishers<- ggplot() + | |
geom_line(data = tour, aes(x = Year, y = Finishers), colour = "goldenrod") + | |
geom_line(data = tour, aes(x = Year, y = Entrants), colour = "grey") + | |
labs(title = "", y = "", x="") + | |
theme_bw() + | |
theme(panel.border = element_blank(), | |
panel.grid.major = element_blank(), | |
panel.grid.minor = element_blank(), | |
axis.line = element_blank(), | |
axis.ticks = element_blank()) | |
ggplotly(EntrantsvsFinishers) | |
``` | |
<summary>Not all participants that start the Tour de France acutally finish it. <span style="color:goldenrod">Finishing this race is a huge sucess</span> and the one that finishes it in the shortest amount of time wears the <span style="color:goldenrod">yellow jersey</span>.</summary> | |
<summary>**Basic Chart - Scatter Plot - Finisher Ratio vs Distance (km)**</summary> | |
```{r,echo=FALSE} | |
tour$FinisherRatio <- round(tour$Finishers / tour$Entrants,2) | |
ratio<-ggplot(tour, aes(x= Distance, y= FinisherRatio, color=Year)) + | |
geom_point(size=1.5) + | |
scale_color_gradient(low="grey", high="grey") + | |
theme_bw() + | |
theme(panel.border = element_blank(), | |
panel.grid.major = element_blank(), | |
panel.grid.minor = element_blank(), | |
axis.line = element_blank(), | |
axis.ticks = element_blank(), | |
legend.position="none") + | |
labs(title = "", y = "", x="") | |
ggplotly(ratio) | |
``` | |
<summary>The finisher ratio reflects the proportion of participants that finished the race from start to finish.</summary> | |
<summary>**Basic Chart - Scatter Plot - Average Winner Speed vs Distance**</summary> | |
```{r, echo=FALSE} | |
speed<- ggplot(tour, aes(x=Distance, y=AvgSpeed, color=Year)) + | |
geom_point(size=1.5) + | |
scale_color_gradient(low="lightgrey", high="black") + | |
theme_bw() + | |
theme(panel.grid.major = element_blank(), | |
panel.grid.minor = element_blank(), | |
panel.border = element_blank(), | |
axis.ticks = element_blank()) + | |
labs(title ="", y = "", x="") + | |
guides(fill = guide_colourbar(barwidth = 0.5, barheight = 10)) | |
ggplotly(speed) | |
``` | |
<summary>**Tufte - Line Chart - Distance over the years**</summary> | |
```{r, echo=FALSE} | |
ggplot(tour, aes(tour$Year, tour$Distance)) + | |
geom_line(size=0.5, color='goldenrod') + | |
theme_tufte(ticks=FALSE, base_size=8) + | |
theme(axis.title=element_blank()) + | |
geom_hline(yintercept=c(3267, 3677), lty=2, size=0.2) | |
``` | |
<summary>The dotted lines represent the distance range since the year 2000.</summary> | |
<summary>**Tufte - Boxplot - Average Winner Speed per Distance**</summary> | |
```{r,echo=FALSE} | |
ggplot(tour, aes(factor(Categories),AvgSpeed)) + | |
geom_tufteboxplot(color= "goldenrod", size=0.8) + | |
theme_tufte()+ | |
theme(plot.title = element_text(family = "serif", color="goldenrod", size=18, hjust=0))+ | |
theme(axis.title=element_blank(), | |
axis.ticks=element_blank()) | |
``` | |
<summary>**Tufte - Barchart - Number of Wins per Country**</summary> | |
```{r,echo=FALSE} | |
min_max_speed$Nationality <- factor(min_max_speed$Nationality, levels = min_max_speed$Nationality [order(-min_max_speed$Number.of.winnings)]) | |
#Plotting the tufte chart with ggplot | |
ggplot(min_max_speed, aes(x=Nationality, y=Number.of.winnings)) + theme_tufte(base_size=15, ticks=F) + | |
geom_bar(width=0.25, fill="goldenrod", stat = "identity") + theme(axis.title=element_blank()) + | |
scale_y_continuous(breaks=seq(0, 40, 10)) + | |
geom_hline(yintercept=seq(0, 40, 5), col="white", lwd=1) + | |
theme(axis.text.x = element_text(angle = 90, hjust = 1)) | |
``` | |
<summary>**Waffle Chart - <span style="color:steelblue">France</span> vs <span style="color:grey">Other Countries</span>**</summary> | |
```{r results="hide", message=FALSE, warning=FALSE, echo=FALSE} | |
# Font for waffle chart | |
load.fontawesome() | |
font_import() | |
fonts()[grep("Awesome", fonts())] | |
loadfonts() | |
loadfonts(device = 'win') | |
nationality<-tour$Nationality | |
nationality_count<-c('France'= sum(nationality=='France'), | |
'Others'= sum(nationality!='France')) | |
waffle<-waffle(nationality_count,rows=5,size=0.5,legend_pos = "none", | |
colors = c("steelblue", "grey", "white"), | |
use_glyph = "bicycle", glyph_size = 5) | |
print(waffle) | |
#Install Font Awesome | |
#Link: https://fontawesome.com/v4.7.0/ | |
``` | |
<summary>**Dumbbell - <span style="color:darkgrey">Min</span><span style="color:lightgrey">-</span><span style="color:goldenrod">Max</span> Speed per Country**</summary> | |
```{r, echo=FALSE} | |
min_max_speed$Nationality <- factor(min_max_speed$Nationality, levels = min_max_speed$Nationality [order(min_max_speed$Number.of.winnings)]) | |
ggplot(min_max_speed, aes(x=Min.speed, xend=Max.speed, y=Nationality, group=Nationality)) + | |
geom_dumbbell(colour="lightgrey", size=0.5,colour_x = "darkgrey", size_x = 1.5, colour_xend="goldenrod", size_xend = 1.5, | |
dot_guide=FALSE) + | |
scale_x_continuous(label=number) + | |
ggtitle("") + | |
xlab("") + | |
ylab("") + | |
labs(x=NULL, y=NULL) + | |
theme_tufte() + | |
theme(axis.ticks.y = element_blank(), axis.ticks.x = element_blank()) | |
``` | |
<summary>**Extra Chart - Map - Start and Finish City**</summary> | |
```{r, echo=FALSE} | |
# Get the world map | |
worldMap <- getMap() | |
### Starting countries | |
startCountry <- c("Belgium","Germany","Ireland","Italy","Luxembourg","Netherlands","Spain", | |
"United Kingdom") | |
# Select only the index of starting countries | |
strC <- which(worldMap$NAME%in%startCountry) | |
# Extract longitude and latitude border's coordinates of the countries | |
startCoords <- lapply(strC, function(i){ | |
df <- data.frame(worldMap@polygons[[i]]@Polygons[[1]]@coords) | |
df$region =as.character(worldMap$NAME[i]) | |
colnames(df) <- list("long", "lat", "region") | |
return(df) | |
}) | |
startCoords <- do.call("rbind", startCoords) | |
### France | |
country <- "France" | |
# France in world map | |
france <- which(worldMap$NAME%in%country) | |
# Extract longitude and latitude border's coordinates of france | |
franceCoords <- lapply(france, function(i){ | |
df <- data.frame(worldMap@polygons[[i]]@Polygons[[1]]@coords) | |
df$region =as.character(worldMap$NAME[i]) | |
colnames(df) <- list("long", "lat", "region") | |
return(df) | |
}) | |
franceCoords <- do.call("rbind", franceCoords) | |
# Plot the map | |
P <- ggplot() + geom_polygon(data = franceCoords, aes(x = long, y = lat, group = region),fill="grey", colour = "white", size = 0.5) + | |
geom_polygon(data = startCoords, aes(x = long, y = lat, group = region), | |
fill="whitesmoke", colour = "white", size = 0.5) + | |
coord_map(xlim = c(-11, 19), ylim = c(35, 59)) | |
P <- P + theme(#panel.grid.minor = element_line(colour = NA), panel.grid.minor = element_line(colour = NA), | |
#panel.background = element_rect(fill = NA, colour = NA), | |
axis.text.x = element_blank(), | |
axis.text.y = element_blank(), axis.ticks.x = element_blank(), | |
axis.ticks.y = element_blank(), axis.title = element_blank(), | |
#rect = element_blank(), | |
plot.margin = unit(0 * c(-1.5, -1.5, -1.5, -1.5), "lines")) | |
P <- P + | |
theme_bw() + | |
theme(panel.grid.major = element_blank(), | |
panel.grid.minor = element_blank(), | |
panel.border = element_blank(), | |
axis.ticks = element_blank(), | |
axis.text = element_blank(), | |
axis.title = element_blank(), | |
plot.margin = unit(0 * c(-1.5, -1.5, -1.5, -1.5), "lines")) | |
P <- P + | |
geom_point(data = tour, aes(x = StartCityLong, y = StartCityLat), color = "blue", size = 2, alpha=0.2) + | |
geom_point(data = tour, aes(x = FinCityLong, y = FinCityLat), color = "goldenrod", size = 2) | |
print(P) | |
``` | |
Many roads lead to Paris. While Paris is always the <span style="color:goldenrod">finishing city</span>, the <span style="color:blue">starting cities</span> change regularly and are also sometimes outside of France. The frequency of a city or region as starting point of the Tour de France is reflected by the colour intensity. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment