Skip to content

Instantly share code, notes, and snippets.

@RunyangWang
Created April 28, 2020 22:46
Show Gist options
  • Save RunyangWang/39ab3a36b902e8dfac9d90131f50fb27 to your computer and use it in GitHub Desktop.
Save RunyangWang/39ab3a36b902e8dfac9d90131f50fb27 to your computer and use it in GitHub Desktop.
R feminism in Olympic
# Load packages
library("plotly")
library("tidyverse")
library("data.table")
library("gridExtra")
library("knitr")
data <- read_csv("athlete_events.csv",
col_types = cols(
ID = col_character(),
Name = col_character(),
Sex = col_factor(levels = c("M","F")),
Age = col_integer(),
Height = col_double(),
Weight = col_double(),
Team = col_character(),
NOC = col_character(),
Games = col_character(),
Year = col_integer(),
Season = col_factor(levels = c("Summer","Winter")),
City = col_character(),
Sport = col_character(),
Event = col_character(),
Medal = col_factor(levels = c("Gold","Silver","Bronze"))
)
)
## Number of men and women over time
# Exclude art competitions from data (I won't use them again in the kernel)
data <- data %>% filter(Sport != "Art Competitions")
# Recode year of Winter Games after 1992 to match the next Summer Games
# Thus, "Year" now applies to the Olympiad in which each Olympics occurred
original <- c(1994,1998,2002,2006,2010,2014)
new <- c(1996,2000,2004,2008,2012,2016)
for (i in 1:length(original)) {
data$Year <- gsub(original[i], new[i], data$Year)
}
data$Year <- as.integer(data$Year)
# Table counting number of athletes by Year and Sex
counts_sex <- data %>% group_by(Year, Sex) %>%
summarize(Athletes = length(unique(ID)))
counts_sex$Year <- as.integer(counts_sex$Year)
# Plot number of male/female athletes vs time
ggplot(counts_sex, aes(x=Year, y=Athletes, group=Sex, color=Sex)) +
geom_point(size=2) +
geom_line() +
scale_color_manual(values=c("darkblue","red")) +
labs(title = "Number of male and female Olympians over time") +
theme(plot.title = element_text(hjust = 0.5))
#Growth in the number of female athletes largely mirrored growth in the number of male athletes up until 1996, when growth in the number of male athletes leveled off at ~8000, while the number of female athletes continued to grow at a high rate. The participation of female athletes reached its highest point during the most recent Olympiad (Sochi 2014 and Rio 2016), in which slightly more than 44% of Olympians were women.
#But not all nations have invested equally in their female athletes: some have embraced the opportunity to win more medals in women's events, while others have been slow to include women on their Olympic teams. The following chart shows the number of female athletes versus the number of male athletes from 5 select Olympic Games (1936, 1956, 1976, 1996, and 2012), with each data point representing a National Olympic Committee (NOC) and separate best-fit regression lines for each of the 5 Games. Only NOCs represented by at least 50 athletes are included in the plot and regression line fitting. The dashed line represents the ideal of NOCs sending teams comprised of 50% women.
## Number of women relative to men across countries
# Count M/F/Total per country per Olympics
# Keep only country-years with at least 30 athletes
counts_NOC <- data %>% filter(Year %in% c(1936,1956,1976,1996,2012)) %>%
group_by(Year, NOC, Sex) %>%
summarize(Count = length(unique(ID))) %>%
spread(Sex, Count) %>%
mutate(Total = sum(M,F,na.rm=T)) %>%
filter(Total > 49)
names(counts_NOC)[3:4] <- c("Male","Female")
counts_NOC$Male[is.na(counts_NOC$Male)] <- 0
counts_NOC$Female[is.na(counts_NOC$Female)] <- 0
counts_NOC$Year <- as.factor(counts_NOC$Year)
# Plot female vs. male athletes by NOC / Year
ggplot(counts_NOC, aes(x=Male, y=Female, group=Year, color=Year)) +
geom_point(alpha=0.6) +
geom_abline(intercept=0, slope=1, linetype="dashed") +
geom_smooth(method="lm", se=FALSE) +
labs(title = "Female vs. Male Olympians from participating NOCs") +
theme(plot.title = element_text(hjust = 0.5)) +
guides(color=guide_legend(reverse=TRUE))
#The chart shows that although there wasn't much change from 1936 to 1956, there was dramatic improvement in female participation from 1956 to 2012. In 1996 and 2012, some NOCs even sent a majority of female athletes to the Games (these are represented by points above the dashed line).
#So which NOCs are leading the way for gender equality in the Olympics? The following charts rank nations by the proportion of female athletes on their Olympic Teams. In addition to showing the proportion of female athletes on each team, I show the proportion of each nations' medals that were won by females. I highlight data from 3 Olympiads: 1936 (Garmisch-Partenkirchen and Berlin), 1976 (Innsbruck and Montreal), and 2012 (Sochi and Rio). Like the previous chart, an NOC must have sent at least 50 athletes to the Games to be included.
## Proportion of women on Olympic teams: 1936
# Proportions of athletes/medals won by women from select NOCs/Years
props <- data %>% filter(Year %in% c(1936,1976,2012)) %>%
group_by(Year, NOC, Sex) %>%
summarize(Athletes = length(unique(ID)),
Medals = sum(!is.na(Medal)))
props <- dcast(setDT(props),
Year + NOC ~ Sex,
fun.aggregate = sum,
value.var = c("Athletes","Medals"))
props <- props %>%
mutate(Prop_F_athletes = Athletes_F/(Athletes_F + Athletes_M),
Prop_F_medals = Medals_F/(Medals_F + Medals_M)) %>%
filter(Athletes_F + Athletes_M > 49)
props$Prop_F_medals[props$Medals_M + props$Medals_F == 0] <- NA
# Data for 1936 only
props_1936 <- props %>%
filter(Year == 1936) %>%
gather(Prop_F_athletes, Prop_F_medals, key="type", value="value")
levs <- props_1936 %>%
filter(type == "Prop_F_athletes") %>%
arrange(value) %>% select(NOC)
props_1936$NOC <- factor(props_1936$NOC, levels=c(levs$NOC))
# Plot 1936
ggplot(props_1936, aes(x=value, y=NOC, color=type)) +
geom_point(na.rm=FALSE, alpha=0.8) +
scale_color_manual(name="",
values=c("black","goldenrod"),
labels=c("Athletes","Medals")) +
labs(title="1936 Olympics (Garmisch-Partenkirchen and Berlin)", x="Proportion female") +
theme(plot.title = element_text(hjust = 0.5)) +
xlim(0,1)
# Data for 2010/2012 only
props_2012 <- props %>%
filter(Year == 2012) %>%
gather(Prop_F_athletes, Prop_F_medals, key="type", value="value")
levs <- props_2012 %>%
filter(type == "Prop_F_athletes") %>%
arrange(value) %>% select(NOC)
props_2012$NOC <- factor(props_2012$NOC, levels=c(levs$NOC))
# Plot 2010/2012
ggplot(props_2012, aes(x=value, y=NOC, color=type)) +
geom_point(na.rm=FALSE, alpha=0.8) +
scale_color_manual(name="",
values=c("black","goldenrod"),
labels=c("Athletes","Medals")) +
labs(title="2010/2012 Olympics (Vancouver and London)",
x="Proportion female") +
theme(plot.title = element_text(hjust = 0.5),
axis.text.y = element_text(size=6)) +
xlim(0,1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment