Skip to content

Instantly share code, notes, and snippets.

View jalapic's full-sized avatar

James Curley jalapic

View GitHub Profile
library(engsoccerdata)
library(dplyr)
library(ggplot2)
library(curleylab) #only used for curleytheme() ggplot theme - could make your own or use theme_bw()
df <- engsoccerdata2 %>% filter(Season!=1939)
df <-
rbind(df %>% filter(home=="Arsenal") %>% select(Season, Date, team=home, opp=visitor, gf=hgoal, ga=vgoal, tier),
### 2014-15 English League Data
library(XML)
library(dplyr)
library(tidyr)
#scraping function
getresults2 <- function(z1){
z1<-as.matrix(z1)
z1<-rbind(colnames(z1), z1)
#### Note - this isn't the cleanest code, but it works fine
#### install packages
devtools::install_github('jalapic/engsoccerdata', username = "jalapic")
library(engsoccerdata)
devtools::install_github('jalapic/curleylab')
library(curleylab) #just for a ggplot theme
head(engsoccerdata2)
library(engsoccerdata)
library(dplyr)
library(magrittr)
tmp <- engsoccerdata2 %>%
filter(tier==4) %$%
split(.,Season) %>%
lapply(., maketable) %>%
lapply(.,data.frame) %>%
@jalapic
jalapic / markovs
Last active August 29, 2015 14:23
Markov Chains in DiagrammeR
### Markov Chain Diagrams Using DiagrammeR
### Introduction and Sample Data
#' Imagine a sequence of behaviors like below, where each letter (A,I,O,R,S,X,Y) refers to
#' a distinct behavior.
#' AOXXYXXXXXXYXXYXXXXXYXXXXXYXSXXXXAXAOOOXAAAOYXXXXXXSXXXXSXXYXYXXYXXYXXXXXXXXXYXXAAAAAAOAA
#' AOAAAOAAAAAOAAAAAAAAAAAOAAAOAAAOOAAAOAAAAAOOIAOAOAOIAOOOAAARSAAOOOAAAAOAAAOOAOOOAOAAAISAA
@jalapic
jalapic / timebin
Last active August 29, 2015 14:23
# Binning Timed Event Data
# needs >=1.9.5 version of data.table
df_bin<-function(df,b=0,e=1200,interval=60){
library(data.table)
colnames(df)<-c("start", "end")
dt<-data.table(df)
lookup = data.table(start = seq(b, (e-interval), by = interval), end = seq(interval, e, by = interval))
ans = foverlaps(lookup, setkey(dt, start, end))
tmp<-ans[, sum(pmin(i.end, end) - pmax(i.start, start)), by=.(i.start,i.end)]
<!DOCTYPE html>
<meta charset="utf-8">
<style> /* set the CSS */
body { font: 12px Arial;}
path {
stroke: steelblue;
stroke-width: 2;
fill: none;
@jalapic
jalapic / slams
Created July 12, 2015 18:33
grand slam data
library("rvest")
html=html("https://en.wikipedia.org/wiki/List_of_Grand_Slam_women%27s_singles_champions")
tmp <- html_table(html_nodes(html, "table")[[1]], fill=T)
library(tidyr)
tmp <- tmp %>% gather(key,value,2:5)
tmp <- tmp[!grepl("tournament", tmp$value),]
tmp <- tmp[!grepl("started", tmp$value),]
tmp <- tmp[!grepl("Evonne Goolagong Cawley", tmp$Year),]
### Open Championship Round 1 scores
openr1$X<-rownames(openr1)
head(openr1)
par<-c(4,4,4,4,5,4,4,3,4,4,3,4,4,5,4,4,4,4)
openr1[,2:19] <- t(apply(openr1[,2:19],1,function(x) x-par))
### Getting golf major winners
library(dplyr)
library("rvest")
url <- "http://en.wikipedia.org/wiki/Masters_Tournament"
masters <- url %>% html() %>% html_nodes(xpath='//*[@id="mw-content-text"]/table[4]') %>% html_table()
url1 <- "http://en.wikipedia.org/wiki/U.S._Open_(golf)"
usopen <- url1 %>% html() %>% html_nodes(xpath='//*[@id="mw-content-text"]/table[2]') %>% html_table()