Skip to content

Instantly share code, notes, and snippets.

@Quwarm
Last active April 27, 2021 17:25
Show Gist options
  • Save Quwarm/b3624e17fe9a200b24bf32831156ae7a to your computer and use it in GitHub Desktop.
Save Quwarm/b3624e17fe9a200b24bf32831156ae7a to your computer and use it in GitHub Desktop.
Prekopcsák — Lemire shrinkage
import numpy as np
def prekopcsac_lemire(class_):
m, n = class_.shape
r = np.corrcoef(class_, rowvar=False)
# CALC standardized class_
mean_vec = class_.mean(axis=0)
std_vec = class_.std(axis=0, ddof=1)
smx = (class_ - np.tile(mean_vec, (m, 1))) / np.tile(std_vec, (m, 1))
# CALC correlation variance
w = np.zeros((m, n, n))
for i in range(m):
w[i, :, :] = smx[i, 0].transpose() * smx[i, 0]
aw = np.zeros((n, n))
for i in range(n):
aw[i, :] = w[:, i, :].mean(axis=0)
var_r = np.zeros((n, n))
for i in range(n):
for j in range(n):
var_r[i, j] = (n / (n - 1) ** 3) * sum((w[:, i, j] - aw[i, j]) ** 2)
# CALC lambda
sum_var_r = sum(sum(var_r)) - sum(np.diag(var_r))
sum_rr = sum(sum(r ** 2)) - sum(np.diag(r) ** 2)
lambda_star = sum_var_r / sum_rr
lambda_ = max(0, min(1, lambda_star))
# CALC R*
multiple = 1 - lambda_
r_star = np.zeros((n, n))
for i in range(n):
for j in range(n):
r_star[i, j] = r[i, j] * multiple if i != j else 1
# CALC COV*
cov_star = np.zeros((n, n))
for i in range(n):
for j in range(n):
cov_star[i, j] = r_star[i, j] * std_vec[i] * std_vec[j] if i != j else std_vec[i] ** 2
return cov_star, lambda_
def mahalanobis(point_from, point_to, inverse_covariance_matrix):
delta = point_from - point_to
return max(np.float64(0), np.dot(np.dot(delta, inverse_covariance_matrix), delta)) ** 0.5
def approx(number: float, *, sign, epsilon=1e-4):
return number + np.sign(sign) * epsilon
test_point = np.array([1., 2.])
class_ = np.array([[1., 1.], [2., 2.]])
T = np.diag(class_.std(axis=0, ddof=1) ** 2)
pl_covariance_matrix, lambda_ = prekopcsac_lemire(class_)
print("T:", *T)
print("COV(*):", *pl_covariance_matrix)
print("Lambda:", lambda_)
# Первое условие - T является положительно определенной матрицей
# (достаточное условие: все собственные значения матрицы T положительны)
first_condition = (np.linalg.eig(T)[0] > approx(0., sign=+1)).all()
print("All(", np.linalg.eig(T)[0], ") > 0 ? -> ", first_condition, sep='')
# Второе условие - лямбда в полуинтервале (0, 1]
second_condition = approx(0., sign=+1) < lambda_ <= 1
print("Lambda =", lambda_, "in (0, 1] ? ->", second_condition)
# Третье условие - наименьшее собственное значение матрицы COV(*)
# должно быть не меньше lambda, умноженной на наименьшее собственное значение T
cov_eig = min(np.linalg.eig(pl_covariance_matrix)[0])
lambda_t_eig = lambda_ * min(np.linalg.eig(T)[0])
third_condition = cov_eig >= lambda_t_eig
print(cov_eig, ">=", lambda_t_eig, "? ->", third_condition)
conditions = [first_condition, second_condition, third_condition]
if all(conditions):
print("Все три условия выполнены")
# Обратная матрица
inverse_pl_covariance_matrix = np.linalg.inv(pl_covariance_matrix)
# Вычисление расстояний
for point_to in [class_.mean(axis=0), *class_]:
print("d_M(*) (", test_point, ", ", point_to, ", COV(*)) = ",
mahalanobis(test_point, point_to, inverse_pl_covariance_matrix), sep='')
else:
print("Невыполненные условия (1-3):", *[i for i, x in enumerate(conditions, 1) if not x])
# Вывод:
# T: [0.5 0. ] [0. 0.5]
# COV(*): [0.5 0.5] [0.5 0.5]
# Lambda: 0
# All([0.5 0.5]) > 0 ? -> True
# Lambda = 0 in (0, 1] ? -> False
# 2.220446049250313e-16 >= 0.0 ? -> True
# Невыполненные условия (1-3): 2
# Комментарий:
# Лямбда <= 0, условие 2 не выполнено, вычисление результата невозможно
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment