Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Analiza wyników 5. PKO Wrocław Półmaratonu
library(XML)
library(dplyr)
library(tidyr)
library(ggplot2)
library(ggthemes)
url_address <- 'wyniki.txt' #tabela sciagnieta ze strony http://wyniki.datasport.pl/results2200/
doc.html = htmlTreeParse(url_address, useInternal = TRUE, encoding = "UTF-8")
wyniki <- unlist(xpathSApply(doc.html, '//td', xmlValue))
wyniki <- matrix(wyniki, ncol = 8, byrow = T)
wyniki <- data.frame(wyniki) %>%
select(name = X2, km5 = X4, km10 = X5, km15 = X6, km20 = X7, km21.1 = X8)
# zamieniamy czas w formie hh:mm:ss na sekundy
to_seconds <- function(x){
sum(as.numeric(strsplit(gsub("(.*) / .*", "\\1", x), ":")[[1]])*c(3600, 60, 1))
}
wyniki_seconds <- wyniki %>% rowwise %>%
mutate_each(funs = funs(to_seconds), -name)
# ,,ręczne" wyliczenie tempa
dane <- wyniki_seconds %>% mutate(km21.1 = (km21.1-km20)/1.1, km20 = (km20 - km15)/5,
km15 = (km15 - km10)/5, km10 = (km10 - km5)/5,
km5 = km5/5) %>%
gather(dystans, tempo, -name) %>%
mutate(dystans = as.numeric(gsub("km", "", dystans))) %>%
arrange(name, dystans)
daneKwantyle <- wyniki_seconds %>%
mutate(km21.1 = (km21.1-km20)/1.1, km20 = (km20 - km15)/5,
km15 = (km15 - km10)/5, km10 = (km10 - km5)/5, km5 = km5/5) %>%
gather(dystans, tempo, -name) %>%
mutate(dystans = as.numeric(gsub("km", "", dystans))) %>%
arrange(name, dystans) %>%
group_by(dystans) %>%
summarise(q25 = quantile(tempo, 0.25, na.rm = T),
q50 = quantile(tempo, 0.5, na.rm = T),
q75 = quantile(tempo, 0.75, na.rm = T)) %>%
gather(name, tempo, -dystans)
danePS <- wyniki_seconds %>%
filter(grepl(pattern = "SOBCZYK Piotr", name)) %>%
mutate(km21.1 = (km21.1-km20)/1.1, km20 = (km20 - km15)/5,
km15 = (km15 - km10)/5, km10 = (km10 - km5)/5, km5 = km5/5) %>%
gather(dystans, tempo, -name) %>%
mutate(dystans = as.numeric(gsub("km", "", dystans)))
daneKwantyle <- bind_rows(daneKwantyle, danePS)
p <- ggplot(dane, aes(x = dystans, y = tempo)) +
geom_line(aes(group = name), colour = "#431600", alpha = 0.01) +
scale_y_continuous(labels=c(t(gsub(" ", ":", outer(3:11, c("00", "30"), paste)))),
breaks=seq(180, 690, 30), limits = c(NA, 660)) +
scale_x_continuous(name = "km") +
ylab("tempo (min/km)") +
geom_line(data = daneKwantyle, aes(group = name, colour = name), alpha = 1,
size = 1.5) +
guides(color = guide_legend(override.aes = aes(size = 5))) +
scale_color_discrete("Tempo lepsze niż",
labels=c(paste(seq(25,75,25), "% zawodników"),"PS")) +
theme_fivethirtyeight(base_size = 18) +
theme(panel.grid.minor.y = element_blank(),
axis.text = element_text(color = "black"),
axis.title = element_text(angle = 90, color = "black"),
axis.title.x = element_text(angle = 0, color = "black"),
plot.title = element_text(size = 26, hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5),
plot.background = element_rect(fill = 'white'),
panel.background = element_rect(fill = 'white'),
legend.background = element_rect(fill = 'white'),
legend.direction = "vertical",
legend.key = element_rect(fill = 'white'),
legend.position = "right") +
labs(title = "Wyniki 5. PKO Nocnego Wrocław Półmaratonu")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.