Skip to content

Instantly share code, notes, and snippets.

@gghatano
gghatano / gist:8344453
Created January 9, 2014 23:58
pylr vs doBy speed test
# 分類用
types <- c("A","B","C","D","E")
# 行数
obs <- 4e+06
# データフレームの作成
dat <- data.frame(type = as.factor(sample(types, obs, replace=TRUE)))
# 列数を増やしながら時間を計測していく
Nmax <- 10
plyr_time <- 0
@gghatano
gghatano / gist:8344475
Created January 10, 2014 00:01
plyr vs doBy speed test and visualize
# 参考文献:
# http://d.hatena.ne.jp/dichika/20140103/p1
library(devtools)
#install_github("dplyr") # うまくいかない
library(plyr)
library(doBy)
library(reshape2)
library(ggplot2)
set.seed(42)
@gghatano
gghatano / gist:8344512
Created January 10, 2014 00:05
Sarrogate data test for MLB atbat data
# サロゲートデータ法の実験をメジャーリーグのデータでやる
library(plyr)
library(MASS)
# データの読み込み
data2011 <- read.csv("all2011.csv")
fields <- read.csv("fields.csv")
names(data2011) <- fields[,"Header"]
# 01データで, 1の連の長さを数える
@gghatano
gghatano / gist:8344555
Created January 10, 2014 00:08
Create a pitting animation of Hisashi-Iwakuma (4/2/2013)
library(ggplot2)
library(lattice)
library(pitchRx)
library(plyr)
dat0402 <- scrapeFX(start="2013-04-02", end="2013-04-02")
pitches0402 <- plyr::join(dat0402$pitch, dat0402$atbat,
by = c("num", "url"),
type="inner")
iwakuma0402 <- subset(pitches0402, pitcher_name=="Hisashi Iwakuma")
@gghatano
gghatano / gist:8344614
Created January 10, 2014 00:14
Scored or not when runner is on third-base (MLB, 2013)
library(plyr)
# データ読み込み
data2013 <- read.csv("all2013.csv")
fields <- read.csv("fields.csv")
names(data2013) <- fields[,"Header"]
# 打者結果だけ
data2013 <- subset(data2013, BAT_EVENT_FL == TRUE)
# 大チャンス場面かどうかをSuperChanceに入れる
@gghatano
gghatano / gist:8344753
Created January 10, 2014 00:26
Career K/BB ranking in MLB data (21st)
# データ読み込み
Pitching <- read.csv("Pitching.csv")
# 重いので2000年以降に限る
Pitching <- subset(Pitching, yearID> 2000)
# plyrを使ってデータを要約
Pitching.KBB <- ddply(Pitching, .(playerID), summarize,
Career.SO = sum(SO, na.rm = TRUE), Career.BB = sum(BB, na.rm=TRUE),
Career.IPouts = sum(IPouts, na.rm = TRUE),
@gghatano
gghatano / gist:8399642
Last active January 3, 2016 02:59
STL sort with Rcpp
# install.packages(c("Rcpp", "inline"))
library(Rcpp)
library(inline)
# sort_stl
src <- "
NumericVector x(xx);
std::sort(x.begin(), x.end());
return x;
"
stlsort <- cxxfunction(signature(xx="numeric"), src, plugin = "Rcpp")
@gghatano
gghatano / gist:8400006
Last active January 3, 2016 03:09
R vs Cpp (sorting)
# R vs Cpp (sorting)
library(Rcpp)
library(inline)
library(ggplot2)
library(reshape2)
# sort_stl
src <- "
NumericVector x(xx);
std::sort(x.begin(), x.end());
@gghatano
gghatano / gist:8473615
Last active January 3, 2016 13:59
relation between pitching rhythm and assist score
library(pitchRx)
library(chron)
library(doBy)
library(plyr)
library(ggplot2)
#data2013 <- scrapeFX(start="2013-02-01", end="2013-10-01")
data2013 <- read.csv("2013.csv")
#data2013 <- subset(data2013, sv_id != "NA")
@gghatano
gghatano / gist:8502210
Created January 19, 2014 08:58
for makky
library(ggplot2)
x <- seq(0,10, by = 0.1)
y <- sin(x)
z <- sin(x) + 1
xyz <- data.frame(x=x, y=y,z=z)
p <- ggplot()
p <- p + geom_line(data = xyz, aes(x = x, y=y)) + geom_line(data = xyz, aes(x=x, y=z))