Source code for alien.sample_generation.random_generator

import numpy as np
from .generator import SampleGenerator
from ..data import TupleDataset, DictDataset


[docs]class UniformSampleGenerator(SampleGenerator):
    """
    Generates uniformly random samples

    :param low, high: Samples will be returned in the range(s)
        [low, high) (exclusive at the upper end). The remaining
        dimensions (of low or high) determine the shapes of the
        returned samples.
    :param dtype: the dtype of the returned values. Can be
        anything Numpy recognizes as a dtype.
    :param random_seed: a random seed to initialize the RNG.
    """

    def __init__(self, low=0, high=1, dtype=float, random_seed=None):
        self.mins = np.array(low)
        self.spans = np.array(high) - self.mins
        self.shape = self.mins.shape
        self.rng = np.random.default_rng(random_seed)
        self.dtype = dtype

[docs]    def generate_samples(self, N):
        return (self.rng.random((N, *self.shape)) * self.spans + self.mins).astype(
            self.dtype
        )


[docs]class RandomSampleGenerator(SampleGenerator):
    """
    :param distribution: a string indicating the type of
        distribution to sample from. Must be the name of one of
        the distributions of numpy.random.Generator. See

        https://numpy.org/doc/stable/reference/random/generator.html#distributions

    :param *args, **kwargs: arguments passed to the distribution
        during sample generation. Typically, these parametrize
        the distribution. (See the reference provided above.)

        We allow args (and kwargs) to be arrays, so that
        each corresponding term in the sample can be sampled
        with different parameters. (Note that numpy distributions
        don't normally allow this.)

    If the shape of each of the args is var_shape, then the
    shape of a single sample will be

        var_shape × shape

    with the latter dimension repeating the parameters
    given in the earlier dimensions.

    :param shape: as explained above, the shape of the
        latter dimensions of a sample, with repeated
        parameters.

    """

    def __init__(
        self,
        distribution="normal",
        *args,
        shape=(),
        dtype=float,
        random_seed=None,
        **kwargs
    ):
        self.rng = np.random.default_rng(random_seed)
        if not hasattr(self.rng, distribution):
            raise ValueError(
                "distribution must be a valid numpy.random.Generator distribution."
            )
        self.dist = getattr(self.rng, distribution)
        self.dtype = dtype
        self.shape = shape

        # Here we determine the shape of the parameter space,
        # i.e.,
        if len(args) > 0:
            self.var_shape = self.args[0].shape
        elif len(kwargs) > 0:
            self.var_shape = self.kwargs.values()[0].shape
        else:
            self.var_shape = ()

        if len(self.var_shape) > 0:
            self.args = TupleDataset((a.flatten() for a in args))
            self.kwargs = DictDataset({k: v.flatten() for k, v in kwargs.items()})
        else:
            self.var_shape = False
            self.args = args
            self.kwargs = kwargs

[docs]    def generate_samples(self, N):
        if self.var_shape:
            # This is some array-manipulation black magic, so I should explain it.

            # First, we iterate through each arg (and kwarg) in concert,
            # yielding sets of args for the distribution. Each arg-set
            # generates a sample of shape:
            #
            #        self.shape x N
            var_samples = [
                self.dist(*args, size=(*self.shape, N), **kwargs)
                for args, kwargs in zip(iter(self.args), iter(self.kwargs))
            ]

            # We then stack these samples (one for each arg-set) on a new axis 0,
            # yielding a single large sample of shape:
            #
            #     (flattened var_shape) x self.shape x N
            samples = np.stack(var_samples)

            # We then reshape this sample, reconstituting the initial dimensions
            # into self.var_shape.
            samples = samples.reshape((*self.var_shape, *self.shape, N))

            # Reshaping is sensitive to how the array is ordered in memory.
            # That the reshape works correctly depends on
            # 1. the fact that the reshaped dimensions come at the beginning, and
            # 2. the fact that these are standard C-ordered arrays.

            # But this leaves us with the batch dimension (size N) at the end,
            # and it needs to be at the beginning. Hence,
            return np.moveaxis(samples, -1, 0).astype(self.dtype)
        else:
            return self.dist(*self.args, size=(N, *self.shape), **self.kwargs).astype(
                self.dtype
            )