Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save psobczyk/0861173760753da9c5de5917d8237ed2 to your computer and use it in GitHub Desktop.
Save psobczyk/0861173760753da9c5de5917d8237ed2 to your computer and use it in GitHub Desktop.
Analiza wyników 5. PKO Wrocław Półmaratonu
library(XML)
library(dplyr)
library(tidyr)
library(ggplot2)
library(ggthemes)
url_address <- 'wyniki.txt' #tabela sciagnieta ze strony http://wyniki.datasport.pl/results2200/
doc.html = htmlTreeParse(url_address, useInternal = TRUE, encoding = "UTF-8")
wyniki <- unlist(xpathSApply(doc.html, '//td', xmlValue))
wyniki <- matrix(wyniki, ncol = 8, byrow = T)
wyniki <- data.frame(wyniki) %>%
select(name = X2, km5 = X4, km10 = X5, km15 = X6, km20 = X7, km21.1 = X8)
# zamieniamy czas w formie hh:mm:ss na sekundy
to_seconds <- function(x){
sum(as.numeric(strsplit(gsub("(.*) / .*", "\\1", x), ":")[[1]])*c(3600, 60, 1))
}
wyniki_seconds <- wyniki %>% rowwise %>%
mutate_each(funs = funs(to_seconds), -name)
# ,,ręczne" wyliczenie tempa
dane <- wyniki_seconds %>% mutate(km21.1 = (km21.1-km20)/1.1, km20 = (km20 - km15)/5,
km15 = (km15 - km10)/5, km10 = (km10 - km5)/5,
km5 = km5/5) %>%
gather(dystans, tempo, -name) %>%
mutate(dystans = as.numeric(gsub("km", "", dystans))) %>%
arrange(name, dystans)
daneKwantyle <- wyniki_seconds %>%
mutate(km21.1 = (km21.1-km20)/1.1, km20 = (km20 - km15)/5,
km15 = (km15 - km10)/5, km10 = (km10 - km5)/5, km5 = km5/5) %>%
gather(dystans, tempo, -name) %>%
mutate(dystans = as.numeric(gsub("km", "", dystans))) %>%
arrange(name, dystans) %>%
group_by(dystans) %>%
summarise(q25 = quantile(tempo, 0.25, na.rm = T),
q50 = quantile(tempo, 0.5, na.rm = T),
q75 = quantile(tempo, 0.75, na.rm = T)) %>%
gather(name, tempo, -dystans)
danePS <- wyniki_seconds %>%
filter(grepl(pattern = "SOBCZYK Piotr", name)) %>%
mutate(km21.1 = (km21.1-km20)/1.1, km20 = (km20 - km15)/5,
km15 = (km15 - km10)/5, km10 = (km10 - km5)/5, km5 = km5/5) %>%
gather(dystans, tempo, -name) %>%
mutate(dystans = as.numeric(gsub("km", "", dystans)))
daneKwantyle <- bind_rows(daneKwantyle, danePS)
p <- ggplot(dane, aes(x = dystans, y = tempo)) +
geom_line(aes(group = name), colour = "#431600", alpha = 0.01) +
scale_y_continuous(labels=c(t(gsub(" ", ":", outer(3:11, c("00", "30"), paste)))),
breaks=seq(180, 690, 30), limits = c(NA, 660)) +
scale_x_continuous(name = "km") +
ylab("tempo (min/km)") +
geom_line(data = daneKwantyle, aes(group = name, colour = name), alpha = 1,
size = 1.5) +
guides(color = guide_legend(override.aes = aes(size = 5))) +
scale_color_discrete("Tempo lepsze niż",
labels=c(paste(seq(25,75,25), "% zawodników"),"PS")) +
theme_fivethirtyeight(base_size = 18) +
theme(panel.grid.minor.y = element_blank(),
axis.text = element_text(color = "black"),
axis.title = element_text(angle = 90, color = "black"),
axis.title.x = element_text(angle = 0, color = "black"),
plot.title = element_text(size = 26, hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5),
plot.background = element_rect(fill = 'white'),
panel.background = element_rect(fill = 'white'),
legend.background = element_rect(fill = 'white'),
legend.direction = "vertical",
legend.key = element_rect(fill = 'white'),
legend.position = "right") +
labs(title = "Wyniki 5. PKO Nocnego Wrocław Półmaratonu")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment