Skip to content

Instantly share code, notes, and snippets.

@wush978
Forked from anonymous/lol.R
Created November 19, 2013 14:53
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save wush978/7546522 to your computer and use it in GitHub Desktop.
Save wush978/7546522 to your computer and use it in GitHub Desktop.
library(XML)
library(RCurl)
url <- "http://loltw.gamebase.com.tw/summoner/TW/AZB_TPS_MiSTakE/monthly_stats#mstat-201311-RANKED_SOLO_5x5"
src <- getURL(url)
doc <- htmlParse(src)
div <- doc['//div[@id="mstat-201311-RANKED_SOLO_5x5"]//table/tbody/tr/td']
row.index.start <- which(sapply(div, function(a) length(a['br'])) > 0)
extract_br <- function(a) {
src <- paste(capture.output(print(a)), collapse="\n")
src.gr <- gregexpr(pattern="<br/><br/>\\n\\s+(?<name>.*)\\n\\s+<br/>", src, perl=TRUE)[[1]]
src.a <- attributes(src.gr)
substring(src, src.a$capture.start, src.a$capture.start + src.a$capture.length - 1)
}
extract_count <- Vectorize(function(src) {
src.gr <- gregexpr(pattern="\\((?<count>\\d+)\\)$", src, perl = TRUE)[[1]]
src.a <- attributes(src.gr)
as.integer(substring(src, src.a$capture.start, src.a$capture.start + src.a$capture.length - 1))
})
df <- data.frame(
name = sapply(div[row.index.start], extract_br),
use = extract_count(sapply(div[row.index.start+1], xmlValue)),
win = extract_count(sapply(div[row.index.start+2], xmlValue))
)
df
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment