Last active
July 8, 2018 16:28
-
-
Save ac00std/1f1c2e7bb0a564cf3cf51d5894fddaa2 to your computer and use it in GitHub Desktop.
Outliers
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(MASS) | |
library(kernlab) | |
#データの読み込み | |
sb=read.csv("sbnote.csv") | |
#学習用データの区分 | |
sb.g=sb[1:100,] | |
sb.g=data.frame(sb.g) | |
sb.t=sb.g[,-7] | |
#MT法 | |
n=nrow(sb.t) # 単位空間のサンプル数を計算 | |
Ave= colMeans(sb.t) # 単位空間の各変数の平均値を計算 | |
Var=var(sb.t)*(n-1)/n # 単位空間の共分散行列を計算 | |
k=ncol(sb.t) # 変数の数を計算 | |
MD=mahalanobis(sb.t, Ave, Var)/k # 単位空間のMDの2乗を計算 | |
plot(MD) | |
pairs(sb.t, pch=21, bg=c("red","blue")[(MD>3)+1]) | |
#one Class SVM | |
sb.ksvm= ksvm(class~.,data=sb.g,type="one-svc",kernel="rbfdot", | |
kpar=list(sigma=0.01),nu=0.001) | |
sb.ksvm.predict=predict(sb.ksvm) | |
pairs(sb.t, pch=21, bg=c("red","green")[2-(sb.ksvm.predict)]) | |
#MT法とone Class SVMの違い | |
set.seed(3383) | |
mu1=c(100, 100) | |
sigma1=matrix(c(10, 5, 5, 10), 2, 2) | |
data1=mvrnorm(1000, mu1, sigma1) | |
plot(data1) | |
mu2=c(150, 150) | |
sigma2=matrix(c(5, 0, 0, 5), 2, 2) | |
data2=mvrnorm(10, mu2, sigma2) | |
plot(data2) | |
x=runif(50,100,150) | |
y=250-x | |
data3=cbind(x,y) | |
data=rbind(data1,data2,data3) | |
plot(data) | |
#MT法 | |
n=nrow(data) # 単位空間のサンプル数を計算 | |
Ave= colMeans(data) # 単位空間の各変数の平均値を計算 | |
Var=var(data)*(n-1)/n # 単位空間の共分散行列を計算 | |
k=ncol(data) # 変数の数を計算 | |
MD=mahalanobis(data, Ave, Var)/k # 単位空間のMDの2乗を計算 | |
plot(MD) | |
plot(data, pch=21, bg=c("red","blue")[(MD>5)+1]) | |
#one Class SVM | |
data.t=data.frame(type=1, data) | |
data.ksvm= ksvm(type~.,data=data.t,type="one-svc",kernel="rbfdot", | |
kpar=list(sigma=0.1),nu=0.001) | |
data.ksvm.predict=predict(data.ksvm) | |
plot(data, pch=21, bg=c("red","green")[2-(data.ksvm.predict)]) | |
# 決定境界プロット用のメッシュ作成 | |
px <- seq(80, 180, 1) | |
py <- seq(80, 180, 1) | |
pgrid <- expand.grid(px, py) | |
names(pgrid)<-c("x","y") | |
#pgrid <-data.frame(type=1, pgrid) | |
# モデルでメッシュデータの分類を全て予測 | |
pred.pgrid.svm <- predict(data.ksvm, newdata=pgrid) | |
# メッシュデータで塗り潰し | |
my.colors <- c("#FFCCCC","#CCFFCC") | |
image(px, py, array(as.numeric(pred.pgrid.svm)-1, dim=c(length(px), length(py))), | |
xlim=c(80, 180), ylim=c(80, 180), col = my.colors) | |
# メッシュデータで等高線をプロット(決定境界) | |
contour(px, py, array(as.numeric(pred.pgrid.svm)-1, dim=c(length(px), length(py))), | |
xlim=c(80, 180), ylim=c(80, 180), | |
col="orange", lwd=2, drawlabels=F, add=T) | |
par(new=T) | |
plot(data, pch=21,xlim=c(80, 180), ylim=c(80, 180), bg=c("red","green")[2-(data.ksvm.predict)],yaxs = "i",xaxs = "i") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment