Last active
June 17, 2019 02:46
-
-
Save bellbind/52354fad1cd58844053809a48982569a to your computer and use it in GitHub Desktop.
[ruby] PCA(Principal Component Analysis) with ruby builtins (>= 1.9)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
ブランド名 | プロっぽい | カッコいい | 好き | かわいい | 一般的 | ミーハー | 初心者 | ダサイ | 嫌い | わからない | 欲しい | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
ロシニョール | 33 | 15 | 38 | 11 | 71 | 94 | 7 | 4 | 8 | 5 | 37 | |
ヤマハ | 10 | 6 | 24 | 18 | 75 | 46 | 29 | 5 | 5 | 12 | 10 | |
オガサカ | 67 | 5 | 18 | 1 | 32 | 4 | 12 | 25 | 24 | 11 | 14 | |
カザマ | 25 | 5 | 12 | 2 | 33 | 4 | 18 | 40 | 23 | 31 | 8 | |
アトミック | 52 | 32 | 24 | 5 | 32 | 32 | 3 | 11 | 5 | 17 | 35 | |
ニシザワ | 23 | 7 | 12 | 1 | 8 | 1 | 3 | 32 | 15 | 28 | 11 | |
クナイスル | 32 | 13 | 12 | 0 | 37 | 2 | 6 | 23 | 16 | 33 | 9 | |
K2 | 48 | 26 | 24 | 3 | 20 | 32 | 2 | 10 | 21 | 20 | 23 | |
フィッシャー | 27 | 14 | 18 | 2 | 20 | 7 | 3 | 9 | 12 | 37 | 11 | |
ブリザード | 41 | 25 | 20 | 4 | 24 | 8 | 2 | 12 | 12 | 28 | 17 | |
ミズノ | 5 | 3 | 1 | 0 | 48 | 6 | 27 | 74 | 27 | 30 | 3 | |
オーリン | 19 | 17 | 22 | 13 | 22 | 37 | 8 | 17 | 10 | 21 | 16 | |
スワロー | 5 | 1 | 0 | 2 | 18 | 1 | 43 | 69 | 26 | 27 | 2 | |
ケスレー | 47 | 19 | 12 | 1 | 17 | 2 | 2 | 14 | 11 | 44 | 14 | |
エラン | 36 | 8 | 13 | 2 | 7 | 11 | 0 | 10 | 12 | 36 | 8 | |
フォルクル | 32 | 7 | 10 | 0 | 1 | 4 | 0 | 11 | 8 | 28 | 8 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# PCA(Principal Component Analysis) with ruby builtins (>= 1.9) | |
require "csv" | |
require "matrix" | |
require "pp" | |
# data from CSV | |
title, *entries = CSV.read("./data.csv").to_a | |
name, *props = title | |
ids = entries.map(&:first) | |
values = entries.map{|line| line[1..-1].map(&:to_f)} | |
# [PCA] data as matrix | |
m = Matrix.rows(values) | |
col_means = m.column_vectors.map{|col| col.reduce(0.0, &:+) / col.size} | |
means_m = Matrix.rows([col_means] * m.row_size) | |
x = m - means_m | |
cxx = (x.t * x) / x.row_size # aka. covariance matrix | |
# [PCA] (descending ordered) eigenvectors of cov matrix as components | |
v, d, vinv = cxx.eigen # (>= ruby-1.9) | |
diags = d.each(:diagonal).to_a | |
desc_index = diags.map.with_index.sort{|a, b| b <=> a}.map(&:last) | |
eigvals = desc_index.map{|i| diags[i]} | |
eigvecs = desc_index.map{|i| v.column(i)} | |
# [PCA] rate of components | |
total = eigvals.reduce(0.0, &:+) | |
rates = eigvals.map{|eigval| eigval / total} | |
pp rates | |
# [PCA] principal 2 components | |
v1 = eigvecs[0] | |
v2 = eigvecs[1] | |
pp props.zip(v1, v2) | |
# [PCA] positions of entries by component axis | |
f1 = x * v1 | |
f2 = x * v2 | |
pp ids.zip(f1, f2) | |
# [PCA] plot 2D | |
def svg_plot(points) | |
width = height = "600px" | |
lcolor, pcolor = "gray", "blue" | |
hw, ps, fs = 100, 1, 3 | |
w = 2 * hw | |
xs = points.map{|e| e[1]} | |
ys = points.map{|e| e[2]} | |
screen = [xs.max - xs.min, ys.max - ys.min].max * 1.5 | |
scale = w / screen | |
svg = "<svg xmlns='http://www.w3.org/2000/svg' version='1.1' | |
width='#{width}' height='#{height}' preserveAspectRatio='xMidYMid meet' | |
viewBox='#{-hw}, #{-hw}, #{w}, #{w}'>" | |
xline = "<g stroke='#{lcolor}'> | |
<line x1='#{-hw}' y1='0' x2='#{hw}' y2='0' stroke-with='#{ps}' /></g>" | |
yline = "<g stroke='#{lcolor}'> | |
<line y1='#{-hw}' x1='0' y2='#{hw}' x2='0' stroke-with='#{ps}' /></g>" | |
plots = points.map{|point| | |
x = point[1] * scale | |
y = -point[2] * scale | |
title = "<title>#{point[0]}: (#{point[1]}, #{point[2]})</title>" | |
circle = "<circle cx='#{x}' cy='#{y}' r='#{ps}' fill='#{pcolor}' />" | |
text = "<text x='#{x}' y='#{y}' font-size='#{fs}'>#{point[0]}</text>" | |
"<g>" + title + circle + text + "</g>" | |
} | |
([svg, xline, yline] + plots + ["</svg>"]).join("\n") | |
end | |
File.open("./pca.svg", "w") do |f| | |
f.puts svg_plot(ids.zip(f1, f2)) | |
end | |
File.open("./vecs.svg", "w") do |f| | |
f.puts svg_plot(props.zip(v1, v2)) | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[0.5012942018488845, | |
0.31384743850346075, | |
0.07636620893889362, | |
0.05156489444914493, | |
0.026012399659040758, | |
0.014932437624557158, | |
0.007586464333765954, | |
0.005258078027204043, | |
0.0016256821356081788, | |
0.001431437894944284, | |
8.075658449602605e-05] | |
[["プロっぽい", 0.1277249626865704, -0.47443724657562575], | |
["カッコいい", 0.12278381069493152, -0.18095405382304142], | |
["好き", 0.25699876226050034, -0.06674225683602523], | |
["かわいい", 0.10998844269347584, 0.06828567443365412], | |
["一般的", 0.3771905520351377, 0.49688945214841307], | |
["ミーハー", 0.6680314691663672, 0.234909238025338], | |
["初心者", -0.0930811355467427, 0.39857127302398926], | |
["ダサイ", -0.41887501475186795, 0.4904015841422266], | |
["嫌い", -0.13026042519444647, 0.09587215238982853], | |
["わからない", -0.21274507124447287, -0.11749668917820728], | |
["欲しい", 0.2357483246568909, -0.09282388991772578]] | |
[["ロシニョール", 92.1475537045644, 25.54762239692057], | |
["ヤマハ", 44.789714396236924, 40.86769258971635], | |
["オガサカ", -3.855388782739114, -13.387021708722454], | |
["カザマ", -22.655393776604292, 15.363560118881761], | |
["アトミック", 31.081900042189865, -19.63469099583756], | |
["ニシザワ", -27.074341991182838, -7.8394637776679], | |
["クナイスル", -11.866396131946242, -2.142569595764945], | |
["K2", 20.048608055036745, -21.344084268695944], | |
["フィッシャー", -7.40729803621966, -16.58825324842577], | |
["ブリザード", 0.8078931266841838, -21.422155068223535], | |
["ミズノ", -38.075021677420175, 54.96086695902395], | |
["オーリン", 15.999224609940356, 2.329764309933965], | |
["スワロー", -51.87566086328713, 43.71943317052951], | |
["ケスレー", -13.01559378496611, -28.458199127746237], | |
["エラン", -11.144952858141776, -25.2680226623144], | |
["フォルクル", -17.904846032145137, -26.70447909160736]] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<svg xmlns='http://www.w3.org/2000/svg' version='1.1' | |
width='600px' height='600px' preserveAspectRatio='xMidYMid meet' | |
viewBox='-100, -100, 200, 200'> | |
<g stroke='gray'> | |
<line x1='-100' y1='0' x2='100' y2='0' stroke-with='1' /></g> | |
<g stroke='gray'> | |
<line y1='-100' x1='0' y2='100' x2='0' stroke-with='1' /></g> | |
<g><title>プロっぽい: (0.1277249626865704, -0.47443724657562575)</title><circle cx='15.668316710637244' cy='58.20031481951809' r='1' fill='blue' /><text x='15.668316710637244' y='58.20031481951809' font-size='3'>プロっぽい</text></g> | |
<g><title>カッコいい: (0.12278381069493152, -0.18095405382304142)</title><circle cx='15.062174162681469' cy='22.198052485093594' r='1' fill='blue' /><text x='15.062174162681469' y='22.198052485093594' font-size='3'>カッコいい</text></g> | |
<g><title>好き: (0.25699876226050034, -0.06674225683602523)</title><circle cx='31.526632826040945' cy='8.187427078414725' r='1' fill='blue' /><text x='31.526632826040945' y='8.187427078414725' font-size='3'>好き</text></g> | |
<g><title>かわいい: (0.10998844269347584, 0.06828567443365412)</title><circle cx='13.492536763233312' cy='-8.376761686370441' r='1' fill='blue' /><text x='13.492536763233312' y='-8.376761686370441' font-size='3'>かわいい</text></g> | |
<g><title>一般的: (0.3771905520351377, 0.49688945214841307)</title><circle cx='46.270837784852475' cy='-60.954578828426314' r='1' fill='blue' /><text x='46.270837784852475' y='-60.954578828426314' font-size='3'>一般的</text></g> | |
<g><title>ミーハー: (0.6680314691663672, 0.234909238025338)</title><circle cx='81.94896605494552' cy='-28.816859776013573' r='1' fill='blue' /><text x='81.94896605494552' y='-28.816859776013573' font-size='3'>ミーハー</text></g> | |
<g><title>初心者: (-0.0930811355467427, 0.39857127302398926)</title><circle cx='-11.418478274376232' cy='-48.893660300582354' r='1' fill='blue' /><text x='-11.418478274376232' y='-48.893660300582354' font-size='3'>初心者</text></g> | |
<g><title>ダサイ: (-0.41887501475186795, 0.4904015841422266)</title><circle cx='-51.38436727838783' cy='-60.15869704807933' r='1' fill='blue' /><text x='-51.38436727838783' y='-60.15869704807933' font-size='3'>ダサイ</text></g> | |
<g><title>嫌い: (-0.13026042519444647, 0.09587215238982853)</title><circle cx='-15.97934776318752' cy='-11.760858768544031' r='1' fill='blue' /><text x='-15.97934776318752' y='-11.760858768544031' font-size='3'>嫌い</text></g> | |
<g><title>わからない: (-0.21274507124447287, -0.11749668917820728)</title><circle cx='-26.097930152192323' cy='14.413590732557905' r='1' fill='blue' /><text x='-26.097930152192323' y='14.413590732557905' font-size='3'>わからない</text></g> | |
<g><title>欲しい: (0.2357483246568909, -0.09282388991772578)</title><circle cx='28.91979247463643' cy='11.386921357833964' r='1' fill='blue' /><text x='28.91979247463643' y='11.386921357833964' font-size='3'>欲しい</text></g> | |
</svg> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Here is original python version: