Source code for rocketpy.simulation.multivariate_rejection_sampler

"""
Multivariate Rejection Sampling Module for RocketPy

Notes
-----
This module is still under active development, and some features or attributes may
change in future versions. Users are encouraged to check for updates and read the
latest documentation.
"""

import json
from dataclasses import dataclass
from pathlib import Path
from random import random
from typing import Union

from rocketpy._encoders import RocketPyEncoder
from rocketpy.tools import flatten_dict


@dataclass
class SampleInformation:
    """Sample information used in the MRS

    Attributes
    ----------
    inputs_json : dict or None
        Dictionary containing the original information of the inputs file.
    flatted_inputs_json : dict or None
        Dictionary containing the information of the inputs file in a
        flat format to allow re-sampling based on nested data.
    outputs_json : dict or None
        Dictionary containing the original information of the outputs file.
    probability_ratio : float or None
        Probability ratio of the new proposed distribution and the previous
        distribution evaluated at the unit sample data.
    acceptance_probability : float or None
        The final acceptance probability that the given sample will be
        re-sampled. It is given by the ratio of the probability ratio
        and the supremum of all probability ratios.
    """

    inputs_json: Union[dict, None] = None
    flatted_inputs_json: Union[dict, None] = None
    outputs_json: Union[dict, None] = None
    probability_ratio: Union[float, None] = None
    acceptance_probability: Union[float, None] = None



[docs]
class MultivariateRejectionSampler:
    """Class that performs Multivariate Rejection Sampling (MRS) from MonteCarlo
    results.

    The class currently assumes that all input variables are sampled independently
    when performing the Monte Carlo Simulation.

    Attributes
    ----------
    """


[docs]
    def __init__(
        self,
        monte_carlo_filepath,
        mrs_filepath,
    ):
        """Initializes Multivariate Rejection Sampler (MRS) class

        Parameters
        ----------
        monte_carlo_filepath : str
            Filepath prefixes to the files created from a MonteCarlo simulation
            results and used as input for resampling.
        mrs_filepath : str
            Filepath prefix to MRS obtained samples. The files created follow the same
            structure as those created by the MonteCarlo class but now containing the
            selected sub-samples.

        Returns
        -------
        None
        """
        self.monte_carlo_filepath = Path(monte_carlo_filepath)
        self.mrs_filepath = Path(mrs_filepath)
        self.distribution_dict = None
        self.original_sample_size = 0
        self.sup_ratio = 1
        self.expected_sample_size = None
        self.final_sample_size = None
        self.input_variables_names = set()
        self.output_variables_names = set()
        self.all_sample_list = []
        self.accepted_sample_list = []
        self.__setup_input()
        self.__load_output()


    # pylint: disable=consider-using-with
    def __setup_input(self):
        """Loads input information from monte carlo in a SampleInformation
        object
        """
        input_filename = self.monte_carlo_filepath.with_suffix(".inputs.txt")

        try:
            input_file = open(input_filename, "r+", encoding="utf-8")
        except FileNotFoundError as e:
            raise FileNotFoundError(
                f"Input file from monte carlo {input_filename} not found!"
            ) from e

        try:
            for line in input_file.readlines():
                sample_info = SampleInformation()
                line_json = json.loads(line)
                flatted_line_json = flatten_dict(line_json)
                sample_info.inputs_json = line_json
                sample_info.flatted_inputs_json = flatted_line_json
                self.all_sample_list.append(sample_info)

                # sets and validates input variables names
                if not self.input_variables_names:
                    self.input_variables_names = set(flatted_line_json.keys())
                self.original_sample_size += 1
        except Exception as e:
            raise ValueError(
                "An error occurred while reading "
                f"the monte carlo input file {input_filename}!"
            ) from e

        finally:
            input_file.close()

    # pylint: disable=consider-using-with
    def __load_output(self):
        """Loads output information from monte carlo in a SampleInformation
        object.
        """
        output_filename = self.monte_carlo_filepath.with_suffix(".outputs.txt")
        sample_size_output = 0  # sanity check

        try:
            output_file = open(output_filename, "r+", encoding="utf-8")
        except FileNotFoundError as e:
            raise FileNotFoundError(
                f"Output file from monte carlo {output_filename} not found!"
            ) from e

        try:
            for line in output_file.readlines():
                if sample_size_output > self.original_sample_size:
                    raise ValueError(
                        "Monte carlo output has more lines than the input file!"
                    )
                line_json = json.loads(line)
                self.all_sample_list[sample_size_output].outputs_json = line_json
                sample_size_output += 1
        except Exception as e:
            raise ValueError(
                "An error occurred while reading "
                f"the monte carlo output file {output_filename}!"
            ) from e

        finally:
            output_file.close()

        if self.original_sample_size > sample_size_output:
            raise ValueError(
                "Monte carlo output file has fewer lines than the input file!"
            )

    def __validate_distribution_dict(self, distribution_dict):
        """Checks that the variables passed in the distribution dictionary were
        in the input file.

        """
        input_variables_names = set(distribution_dict.keys())
        for variable in input_variables_names:
            if variable not in self.input_variables_names:
                raise ValueError(
                    f"Variable key {variable} from 'distribution_dict' "
                    "not found in input file!"
                )

    # pylint: disable=consider-using-with

[docs]
    def sample(self, distribution_dict):
        """Performs rejection sampling and saves data

        Parameters
        ----------
        distribution : dict
            Dictionary whose keys contain the name whose distribution changed.
            The values are tuples or lists with two entries. The first entry is
            a probability density (mass) function for the old distribution,
            while the second entry is the probability density function for the
            new distribution.

        Returns
        -------
        None
        """

        self.__validate_distribution_dict(distribution_dict)

        mrs_input_file = open(
            self.mrs_filepath.with_suffix(".inputs.txt"), "w+", encoding="utf-8"
        )
        mrs_output_file = open(
            self.mrs_filepath.with_suffix(".outputs.txt"), "w+", encoding="utf-8"
        )
        mrs_error_file = open(
            self.mrs_filepath.with_suffix(".errors.txt"), "w+", encoding="utf-8"
        )

        self.__setup_probabilities(distribution_dict)

        try:
            for sample in self.all_sample_list:
                if random() < sample.acceptance_probability:
                    mrs_input_file.write(
                        json.dumps(sample.inputs_json, cls=RocketPyEncoder) + "\n"
                    )
                    mrs_output_file.write(
                        json.dumps(sample.outputs_json, cls=RocketPyEncoder) + "\n"
                    )
        except Exception as e:
            raise ValueError(
                "An error occurred while writing the selected sample to the "
                "output files"
            ) from e

        finally:
            mrs_input_file.close()
            mrs_output_file.close()
            mrs_error_file.close()


    def __setup_probabilities(self, distribution_dict):
        """Computes the probability ratio, probability ratio supremum and acceptance
        probability for each sample.

        Parameters
        ----------
        distribution : dict
            Dictionary whose keys contain the name whose distribution changed. The values
            are tuples or lists with two entries. The first entry is a probability
            density (mass) function for the old distribution, while the second entry
            is the probability density function for the new distribution.
        """
        self.sup_ratio = 1
        for sample in self.all_sample_list:
            sample.probability_ratio = self.__compute_probability_ratio(
                sample, distribution_dict
            )
            self.sup_ratio = max(self.sup_ratio, sample.probability_ratio)

        for sample in self.all_sample_list:
            sample.acceptance_probability = sample.probability_ratio / self.sup_ratio
        self.expected_sample_size = self.original_sample_size // self.sup_ratio

    def __compute_probability_ratio(self, sample, distribution_dict):
        """Computes the ratio of the new probability to the old probability
        for the given sample

        Parameters
        ----------
        sample: SampleInformation
            Sample information used to extract the values to evaluate the
            distributions pdf.
        distribution : dict
            Dictionary whose keys contain the name whose distribution changed. The values
            are tuples or lists with two entries. The first entry is a probability
            density (mass) function for the old distribution, while the second entry
            is the probability density function for the new distribution.

        Raises
        ------
        ValueError
            Raises exception if an error occurs when computing the ratio.
        """
        probability_ratio = 1
        try:
            for variable in distribution_dict.keys():
                value = sample.flatted_inputs_json[variable]
                old_pdf = distribution_dict[variable][0]
                new_pdf = distribution_dict[variable][1]
                probability_ratio *= new_pdf(value) / old_pdf(value)
        except Exception as e:
            raise ValueError(
                "An error occurred while evaluating the probability ratio"
            ) from e

        return probability_ratio