Skip to content

Instantly share code, notes, and snippets.

@kpq
Created February 5, 2015 22:27
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kpq/c2e1fdae6b8076f99431 to your computer and use it in GitHub Desktop.
Save kpq/c2e1fdae6b8076f99431 to your computer and use it in GitHub Desktop.
setwd("/Users/kevin/NYT/working/2015-02-02-womens-skiing/r-work")
library(XML)
url_1 <- "http://data.fis-ski.com/global-links/statistics/overview-top-ranked-in-all-competitions.html?place=&season="
url_2 <- "&sector=AL&nation_place=&gender="
url_3 <- "&category=WC&nbr=13&nation_comp=USA&discipline="
url_4 <- "&Submit=Search"
years <- 1967:2015
gender <- c("M", "L", "T", "G")
disciplines <- c("ALL", "DH", "SL", "GS", "SG", "K", "SC", "CAR", "IND", "TE", "KOS", "KOG", "CE")
get_data_for_year_discipline_gender <- function(this_year, gender, discipline) {
this_year <- this_year
gender <- "L"
discipline <- "DH"
url <- paste(url_1, this_year, url_2, gender, url_3, discipline, url_4, sep="")
a <- readHTMLTable(url)
df <- data.frame(a[1])
df <- df[5:nrow(df),]
names(df) <- c("date", "event_name", "nat", "disc", "rank1", "rank2", "rank3")
df <- df[!is.na(df$nat),]
df$gender <- gender
df$discipline <- discipline
for (i in 1:nrow(df)) {
row <- df[i,]
if (row$date == "") {
df[i,c("date")] <- df[i-1,c("date")]
df[i,c("event_name")] <- df[i-1,c("event_name")]
df[i,c("nat")] <- df[i-1,c("nat")]
df[i,c("disc")] <- df[i-1,c("disc")]
}
}
df$date2 <- as.Date(df$date, format="%d-%m-%Y")
df$year <- format(df$date2, "%Y")
df <- subset(df, year == this_year)
return (df)
}
data <- NULL
for (y in years) {
for (g in gender) {
for (d in disciplines) {
df1 <- get_data_for_year_discipline_gender(y,g,d)
data <- rbind(df1, data)
print(paste(y, g, d))
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment