Created
November 25, 2018 10:03
-
-
Save badbye/4607b0808119204f0cc85dcc0ac6f878 to your computer and use it in GitHub Desktop.
solve OLS in linear regression using map reduce
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# referenece: https://ieeexplore.ieee.org/document/7345473/ | |
# 验证该论文的分布式计算结果 | |
#' 准备数据 | |
X = matrix(rnorm(400), 20, 20) | |
Y = rnorm(20) | |
#' 计算分隔数据的 index | |
splitByRowIndex = function(nrows, size){ | |
block.len = nrows / size | |
res = list() | |
for (i in 1:size){ | |
res[[i]] = c((i-1) * blocksize + 1, i * blocksize) | |
} | |
res | |
} | |
#' 把 `mat 矩阵`按行等分成 `size` 份 | |
splitMatByRow = function(mat, size){ | |
row.index = splitByRowIndex(nrow(mat), size) | |
lapply(row.index, function(val) mat[val[1]:val[2], ]) | |
} | |
#' Algorithm 1 Main function | |
block.number = 4 # 数据分成 4 份 | |
x.block = splitMatByRow(X, block.number) | |
y.block = splitMatByRow(matrix(Y, ncol = 1), block.number) | |
# Algorithm 2 Mapper function in step1 | |
x.block.qr = lapply(x.block, function(val){ | |
qrresult = qr(val) | |
list(Q=qr.Q(qrresult), R=qr.R(qrresult)) | |
}) | |
# Algorithm 3 Reducer function in step1 | |
Rtemp = do.call(rbind, lapply(x.block.qr, function (l) l$R)) | |
qrresult = qr(Rtemp) | |
Rtemp.qr = list(Q=qr.Q(qrresult), R=qr.R(qrresult)) | |
R.final = Rtemp.qr$R | |
Rtemp.Q.divide = splitMatByRow(Rtemp.qr$Q, block.number) | |
Q.result = list() | |
for (i in 1:block.number){ | |
Q.result[[i]] = t(x.block.qr[[i]]$Q %*% Rtemp.Q.divide[[i]]) # Multiply | |
} | |
# Algorithm 4 Reduce function in step2 | |
V = list() | |
for (i in 1:block.number){ | |
V[[i]] = Q.result[[i]] %*% matrix(y.block[[i]], ncol = 1) # Multiply | |
} | |
# Algorithm 5 Reduce function in step3 | |
V.sum = Reduce('+', V) | |
beta = as.numeric(solve(R.final) %*% V.sum) | |
# real answer | |
ans = lm(Y ~ X - 1) | |
coefficients = ans$coefficients | |
# comare | |
sum((beta - coefficients) ** 2) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment