Skip to content

Instantly share code, notes, and snippets.

@Radagaisus
Last active November 5, 2022 16:45
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Radagaisus/deb303e032d79a32329e4fe2132b7fe8 to your computer and use it in GitHub Desktop.
Save Radagaisus/deb303e032d79a32329e4fe2132b7fe8 to your computer and use it in GitHub Desktop.
Helper module for deterministic PRNG seeding
# Deterministic Pseudo-Random Number Generator Seeding
# ------------------------------------------------------------------------------
# A helper script for setting a consistent deterministic PRNG seed for third-
# party libraries, supporting more reproducible experiment execution.
#
# Setting a deterministic seed:
#
# - By an environment variable: `PYTHON_SEED=12345 python script.py`
# - By calling `randomness.reseed(12345)`
#
# The seed is determined by the `PYTHON_SEED` environment variable and must
# be an integer. It is optional, and, if not present, the module’s interface
# will remain the same, albeit results will run nondeterministically. It’s
# best to import this module early in execution, as it also seeds the PRNG
# of some global functionality.
#
# Note that any code using `np.random` should instead import `random` from this
# module. Likewise any code using `mimesis`. The API is the same, only using a
# fixed seed.
#
# The following seeds are set:
#
# - The standard library’s `random`.
# - Numpy’s global `np.random` (which is used by scikit-learn).
# - PyTorch’s seed (also configures settings for deterministic results).
#
# The following objects are exported:
#
# - `mimesis`, a `mimesis.Generic()` initialized with the given seed.
# - `random`, with the same interface as `np.random`, using Numpy’s PCG64
# generator.
#
# PyTorch and Mimesis configuration is done only if their respective packages
# are already installed.
#
# See: https://numpy.org/doc/stable/reference/random/index.html
# See: https://pytorch.org/docs/stable/notes/randomness.html
# See: https://mimesis.name/api.html#baseprovider
#
import os
from importlib.util import find_spec
import random as stdlib_random
import numpy as np
from numpy.random import Generator, PCG64
def is_package_installed(package: str) -> bool:
"""Returns a boolean indicating whether the given package is installed."""
return bool(find_spec(package))
# Flags for conditionally setting seeds for packages only if they’re installed
use_torch = is_package_installed('torch')
use_mimesis = is_package_installed('mimesis')
# Conditionally import only the packages that are already installed
if use_torch: import torch
if use_mimesis: from mimesis import Generic
def reseed(seed: int) -> None:
"""
Reseeds randomness services.
- Reseeds the standard library’s `random`.
- Reseeds the global `np.random`.
- Reseeds `randomness.random` (a fixed seed PCG64 `np.random`)
- Reseeds `randomness.mimesis` (a fixed seed `mimesis.Generic()`)
- Reseeds PyTorch (and ensures settings for deterministic results).
Args:
seed: the seed to use for reseeding.
"""
# Seed Python’s built-in PRNG
stdlib_random.seed(seed)
# Seed the global numpy PRNG, which is used by scikit-learn, among others.
np.random.seed(seed)
# Reseed the local numpy PRNG
global random; random = Generator(PCG64(seed))
# Reseed the Mimesis generic instance used for fake data generation
if use_mimesis: global mimesis; mimesis = Generic(seed=seed)
# PyTorch deterministic seed settings
if use_torch:
# Set PyTorch’s seed
torch.manual_seed(seed)
# Settings for deterministic reproducible PyTorch results
# See: https://pytorch.org/docs/stable/notes/randomness.html
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
# Retrieve the PRNG seed from the environment. `None` if missing.
seed = os.environ.get('PYTHON_SEED')
# If the seed exists, convert it to an integer and reseed randomness in
# all the relevant services. Otherwise, expose an equivalent interface,
# though a nondeterministic one.
if seed != None:
seed = int(seed)
reseed(seed)
else:
# Expose a PCG64 PRNG generator using a nondeterministic seed.
random = Generator(PCG64())
# Expose a Mimesis instance using a nondeterministic seed.
if use_mimesis: global mimesis; mimesis = Generic()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment