Last active
November 18, 2021 15:18
-
-
Save chrisliatas/d74eaa1062a091120001d395835cc367 to your computer and use it in GitHub Desktop.
Impute missing values with random selection
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
def imput_nan_rand(col, in_place=False): | |
"""Get a pd.Series with NaNs and impute missing values with random selection | |
from the column's values range. | |
""" | |
# Initialize random generator | |
rng = np.random.default_rng() | |
c = col if in_place else col.copy() | |
# get indices of `-1` occurrences | |
idxs = np.where(c == -1)[0] | |
# initialize boolean mask | |
m = np.ones(c.size, dtype=bool) | |
# change boolean mask at indices | |
m[idxs] = False | |
# create column unique values range | |
pool = np.unique(c[m]).astype(int) | |
# assign random values from the pool at `-1` locations | |
c[~m] = rng.choice(pool, size=idxs.size, shuffle=False) | |
return c.astype(int) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment