Last active
October 8, 2020 16:23
-
-
Save geojackass/eeda4bdc6603dd52de92ea338f4957fa to your computer and use it in GitHub Desktop.
製品データの異常度に基づく異常値の算出
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#####initial setup###### | |
######################## | |
pacman::p_load(tidyverse, magrittr, stringr) | |
######################## | |
df <- read.csv('{dir_path}') | |
#データの確認 | |
head(df) | |
#データサイズの確認 | |
dim(df) | |
#データの抽出[{start}:{end}]それぞれカラム番号を数値で入力 | |
dat <- df[{start}:{end}] | |
X <- dat | |
X | |
X <- as.matrix(log(X[,2] +1)) #対数変換 | |
mx <- colMeans(X) #平均値の計算 | |
Xc <- X - matrix(1,nrow(X),1) %*% mx #中心化データ列の計算 | |
Sx <- t(Xc) %*% Xc / nrow(X) #共分散行列の計算 | |
a <- rowSums((Xc %*% solve(Sx)) * Xc) / ncol(X) #1変数当たりの異常度 | |
plot(a, xlab="index", ylab="anomaly_score", ylim=c(-1, 11)/ncol(X)) #描画 | |
lines(1:100,rep(2,length(1:100)), col="red", lty=2) #2の高さに線を引く |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment