Skip to content

Instantly share code, notes, and snippets.

@p-geon
Last active July 18, 2022 04:17
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save p-geon/c8ddc4c7941d51f5a35f98848041bcf5 to your computer and use it in GitHub Desktop.
Save p-geon/c8ddc4c7941d51f5a35f98848041bcf5 to your computer and use it in GitHub Desktop.
import copy
import itertools
from typing import List, Callable
import numpy as np; np.random.seed(seed=42)
from sklearn.preprocessing import StandardScaler, minmax_scale
# https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.StandardScaler.html#sklearn.preprocessing.StandardScaler
# https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.minmax_scale.html#sklearn.preprocessing.minmax_scale
def check_result(y1, y2, eps=1e-11):
print(f'[y1] max: {np.round(np.max(y1), 4)}, min: {np.round(np.min(y1), 4)}, mean: {np.round(np.mean(y1), 4)}, std: {np.round(np.std(y1), 4)}')
print(f'[y2] max: {np.round(np.max(y2), 4)}, min: {np.round(np.min(y2), 4)}, mean: {np.round(np.mean(y2), 4)}, std: {np.round(np.std(y2), 4)}')
assert np.sum(np.linalg.norm(y1 - y2, ord=2)) < eps, f'Arrays inconsistency in normalizing. {y1}, {y2}'
def test_preprocessing(normalize: List[Callable], _y: np.ndarray) -> None:
y = copy.deepcopy(_y)
y1 = normalize[0](y)
y2 = normalize[1](y)
check_result(y1, y2)
ys = [np.identity(5)] + [np.random.rand(10, 10) for _ in range(5)]
methods = {
'normalize_std': [
lambda x: StandardScaler().fit(x).transform(x),
lambda x: (x - np.mean(x, axis=0)) / np.std(x, axis=0)
],
'normalize_minmax': [
lambda x: minmax_scale(x),
lambda x: (x - np.min(x, axis=0))/(np.max(x, axis=0) - np.min(x, axis=0))
],
}
for i, (m, y) in enumerate(itertools.product(methods.keys(), ys)):
print(f"> {m}-{1+i%ys.__len__()}")
test_preprocessing(methods[m], y)
@p-geon
Copy link
Author

p-geon commented Jul 18, 2022

normalize_std-1
[y1] max: 2.0, min: -0.5, mean: -0.0, std: 1.0
[y2] max: 2.0, min: -0.5, mean: -0.0, std: 1.0
normalize_std-2
[y1] max: 2.2014, min: -1.7005, mean: -0.0, std: 1.0
[y2] max: 2.2014, min: -1.7005, mean: -0.0, std: 1.0
normalize_std-3
[y1] max: 1.7333, min: -1.931, mean: -0.0, std: 1.0
[y2] max: 1.7333, min: -1.931, mean: -0.0, std: 1.0
normalize_std-4
[y1] max: 1.811, min: -2.2329, mean: 0.0, std: 1.0
[y2] max: 1.811, min: -2.2329, mean: 0.0, std: 1.0
normalize_std-5
[y1] max: 2.2626, min: -2.0856, mean: -0.0, std: 1.0
[y2] max: 2.2626, min: -2.0856, mean: -0.0, std: 1.0
normalize_std-6
[y1] max: 2.3221, min: -1.7309, mean: 0.0, std: 1.0
[y2] max: 2.3221, min: -1.7309, mean: 0.0, std: 1.0
normalize_minmax-1
[y1] max: 1.0, min: 0.0, mean: 0.2, std: 0.4
[y2] max: 1.0, min: 0.0, mean: 0.2, std: 0.4
normalize_minmax-2
[y1] max: 1.0, min: 0.0, mean: 0.466, std: 0.3438
[y2] max: 1.0, min: 0.0, mean: 0.466, std: 0.3438
normalize_minmax-3
[y1] max: 1.0, min: 0.0, mean: 0.4966, std: 0.3461
[y2] max: 1.0, min: 0.0, mean: 0.4966, std: 0.3461
normalize_minmax-4
[y1] max: 1.0, min: 0.0, mean: 0.5281, std: 0.3311
[y2] max: 1.0, min: 0.0, mean: 0.5281, std: 0.3311
normalize_minmax-5
[y1] max: 1.0, min: 0.0, mean: 0.5144, std: 0.3408
[y2] max: 1.0, min: 0.0, mean: 0.5144, std: 0.3408
normalize_minmax-6
[y1] max: 1.0, min: 0.0, mean: 0.4812, std: 0.3574
[y2] max: 1.0, min: 0.0, mean: 0.4812, std: 0.3574

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment