Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save HughParsonage/ec598ebe30b73cd56c651219d5bec834 to your computer and use it in GitHub Desktop.
Save HughParsonage/ec598ebe30b73cd56c651219d5bec834 to your computer and use it in GitHub Desktop.
library(rvest)
library(dplyr)
library(tidyr)
library(xml2)
library(data.table)
url_act_electorates_outrights <-
"http://www.sportsbet.com.au/betting/politics/australian-federal-politics/outrights?ev_oc_grp_id=1971028"
url_nt <-
"http://www.sportsbet.com.au/betting/politics/australian-federal-politics/outrights?ev_oc_grp_id=1971052"
xpaths <-
list(
ACT = 1971028,
NT = 1971052,
NSW = 1956097,
QLD = 1971071,
SA = 1971079,
TAS = 1971095,
VIC = 1971122,
WA = 1971123
)
state_odds_table <- function(i){
nsw_url <-
paste0("http://www.sportsbet.com.au/betting/politics/australian-federal-politics/outrights?ev_oc_grp_id=", xpaths[[i]])
odds <-
nsw_url %>%
read_html() %>%
html_nodes(xpath=paste0('//*[@id="accordion-body-', xpaths[[i]], '"]')) %>%
xml_text()
odds_nodes <-
nsw_url %>%
read_html() %>%
xml_nodes(xpath='//*[@id="accordion"]/ul')
tbl_odds_char <-
odds_nodes %>%
xml_text() %>%
strsplit(split = "[0-9][0-9]:[0-9][0-9][^0-9]+Markets\\s+\\([0-9]\\)\n+")
# first entry not a division
tbl_odds_char <-
tbl_odds_char[[1]][-1]
divisions <-
odds_nodes %>%
xml_text() %>%
stringr::str_extract_all(pattern = "[0-9][0-9]:[0-9][0-9][^0-9]+Markets\\s+\\([0-9]\\)\n+") %>%
unlist() %>%
gsub("\n+", "", .) %>%
gsub("[^A-Za-z]", "", .) %>%
# La Trobe is 'Vic'.
gsub(paste0("^([A-Za-z]+)((Vic)|(", names(xpaths)[i], "))Markets$"), "\\1", .)
divisions <- ifelse(grepl("GEL+IBRAND", divisions, ignore.case = TRUE),
"GELLIBRAND",
divisions)
names(tbl_odds_char) <- divisions
odds_nsw_list <-
tbl_odds_char %>%
unlist %>%
stringr::str_split(., pattern = "\n{3,}")
names(odds_nsw_list) <- divisions
include_only_party_indices <- function(x){
x[grepl("\\.[0-9][0-9][0-9]*$", x)] # need to include 1.001
}
odds_nsw_list <- lapply(odds_nsw_list, include_only_party_indices)
maxRow <- max(sapply(odds_nsw_list, length))
cbind_asis <- function(...){
cbind.data.frame(..., stringsAsFactors = FALSE)
}
odds_nsw_df <-
do.call(cbind_asis, lapply(odds_nsw_list, function(x){
length(x) <- maxRow
x
})) %>%
as.data.frame(., stringsAsFactors = FALSE) %>%
mutate(rowNumber = as.character(rownames(.))) %>%
gather(Division, char, -rowNumber, na.rm = TRUE) %>%
filter(complete.cases(.)) %>%
mutate(Party = stringr::str_extract(char, pattern = "((Labor)|(Xenophon)|(Liberal)|(Coalition)|(National)|(Green)|(Other)|(Independent))"),
Odds = as.numeric(gsub("^.*\n+(.*)$", "\\1", char))) %>%
mutate(Party = ifelse(Party %in% c("National", "Liberal"), "Coalition", Party))
as.data.table(odds_nsw_df)
}
national_odds <-
lapply(seq_along(xpaths), state_odds_table) %>%
rbindlist(use.names = TRUE, fill = TRUE)
national_odds %<>%
group_by(Division) %>%
arrange(Odds) %>%
mutate(Probability = (1/(Odds)) / sum(1 / Odds),
Favourite = first(Party)) %>%
mutate(Time = Sys.time()) %>%
group_by(Division, Party) %>%
mutate(
Probability2 = sum(Probability, na.rm = TRUE)
) %>%
# this will exclude some where the odds are even
ungroup
national_favourites <-
national_odds %>%
select(-char) %>%
group_by(Division) %>%
filter(rowNumber == 1) %>%
ungroup %>%
mutate(ELECT_DIV2 = toupper(Division))
# fwrite(national_odds, file.path = paste0("data/", gsub(":", "", Sys.time(), fixed = TRUE), "national_favourites.csv"))
readr::write_csv(national_odds,
path = file.path("data", paste0(gsub(":", "", Sys.time(), fixed = TRUE), "national_favourites.csv")))
readr::write_csv(national_favourites,
path = file.path("data", paste0(gsub(":", "", Sys.time(), fixed = TRUE), "the_national_favourites.csv")))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment