Source code for rocketpy.simulation.multivariate_rejection_sampler

"""
Multivariate Rejection Sampling Module for RocketPy

Notes
-----
This module is still under active development, and some features or attributes may
change in future versions. Users are encouraged to check for updates and read the
latest documentation.
"""

import json
from dataclasses import dataclass
from pathlib import Path
from random import random
from typing import Union

from rocketpy._encoders import RocketPyEncoder
from rocketpy.tools import flatten_dict


@dataclass
class SampleInformation:
    """Sample information used in the MRS

    Attributes
    ----------
    inputs_json : dict or None
        Dictionary containing the original information of the inputs file.
    flatted_inputs_json : dict or None
        Dictionary containing the information of the inputs file in a
        flat format to allow re-sampling based on nested data.
    outputs_json : dict or None
        Dictionary containing the original information of the outputs file.
    probability_ratio : float or None
        Probability ratio of the new proposed distribution and the previous
        distribution evaluated at the unit sample data.
    acceptance_probability : float or None
        The final acceptance probability that the given sample will be
        re-sampled. It is given by the ratio of the probability ratio
        and the supremum of all probability ratios.
    """

    inputs_json: Union[dict, None] = None
    flatted_inputs_json: Union[dict, None] = None
    outputs_json: Union[dict, None] = None
    probability_ratio: Union[float, None] = None
    acceptance_probability: Union[float, None] = None


[docs] class MultivariateRejectionSampler: """Class that performs Multivariate Rejection Sampling (MRS) from MonteCarlo results. The class currently assumes that all input variables are sampled independently when performing the Monte Carlo Simulation. Attributes ---------- """
[docs] def __init__( self, monte_carlo_filepath, mrs_filepath, ): """Initializes Multivariate Rejection Sampler (MRS) class Parameters ---------- monte_carlo_filepath : str Filepath prefixes to the files created from a MonteCarlo simulation results and used as input for resampling. mrs_filepath : str Filepath prefix to MRS obtained samples. The files created follow the same structure as those created by the MonteCarlo class but now containing the selected sub-samples. Returns ------- None """ self.monte_carlo_filepath = Path(monte_carlo_filepath) self.mrs_filepath = Path(mrs_filepath) self.distribution_dict = None self.original_sample_size = 0 self.sup_ratio = 1 self.expected_sample_size = None self.final_sample_size = None self.input_variables_names = set() self.output_variables_names = set() self.all_sample_list = [] self.accepted_sample_list = [] self.__setup_input() self.__load_output()
# pylint: disable=consider-using-with def __setup_input(self): """Loads input information from monte carlo in a SampleInformation object """ input_filename = self.monte_carlo_filepath.with_suffix(".inputs.txt") try: input_file = open(input_filename, "r+", encoding="utf-8") except FileNotFoundError as e: raise FileNotFoundError( f"Input file from monte carlo {input_filename} not found!" ) from e try: for line in input_file.readlines(): sample_info = SampleInformation() line_json = json.loads(line) flatted_line_json = flatten_dict(line_json) sample_info.inputs_json = line_json sample_info.flatted_inputs_json = flatted_line_json self.all_sample_list.append(sample_info) # sets and validates input variables names if not self.input_variables_names: self.input_variables_names = set(flatted_line_json.keys()) self.original_sample_size += 1 except Exception as e: raise ValueError( "An error occurred while reading " f"the monte carlo input file {input_filename}!" ) from e finally: input_file.close() # pylint: disable=consider-using-with def __load_output(self): """Loads output information from monte carlo in a SampleInformation object. """ output_filename = self.monte_carlo_filepath.with_suffix(".outputs.txt") sample_size_output = 0 # sanity check try: output_file = open(output_filename, "r+", encoding="utf-8") except FileNotFoundError as e: raise FileNotFoundError( f"Output file from monte carlo {output_filename} not found!" ) from e try: for line in output_file.readlines(): if sample_size_output > self.original_sample_size: raise ValueError( "Monte carlo output has more lines than the input file!" ) line_json = json.loads(line) self.all_sample_list[sample_size_output].outputs_json = line_json sample_size_output += 1 except Exception as e: raise ValueError( "An error occurred while reading " f"the monte carlo output file {output_filename}!" ) from e finally: output_file.close() if self.original_sample_size > sample_size_output: raise ValueError( "Monte carlo output file has fewer lines than the input file!" ) def __validate_distribution_dict(self, distribution_dict): """Checks that the variables passed in the distribution dictionary were in the input file. """ input_variables_names = set(distribution_dict.keys()) for variable in input_variables_names: if variable not in self.input_variables_names: raise ValueError( f"Variable key {variable} from 'distribution_dict' " "not found in input file!" ) # pylint: disable=consider-using-with
[docs] def sample(self, distribution_dict): """Performs rejection sampling and saves data Parameters ---------- distribution : dict Dictionary whose keys contain the name whose distribution changed. The values are tuples or lists with two entries. The first entry is a probability density (mass) function for the old distribution, while the second entry is the probability density function for the new distribution. Returns ------- None """ self.__validate_distribution_dict(distribution_dict) mrs_input_file = open( self.mrs_filepath.with_suffix(".inputs.txt"), "w+", encoding="utf-8" ) mrs_output_file = open( self.mrs_filepath.with_suffix(".outputs.txt"), "w+", encoding="utf-8" ) mrs_error_file = open( self.mrs_filepath.with_suffix(".errors.txt"), "w+", encoding="utf-8" ) self.__setup_probabilities(distribution_dict) try: for sample in self.all_sample_list: if random() < sample.acceptance_probability: mrs_input_file.write( json.dumps(sample.inputs_json, cls=RocketPyEncoder) + "\n" ) mrs_output_file.write( json.dumps(sample.outputs_json, cls=RocketPyEncoder) + "\n" ) except Exception as e: raise ValueError( "An error occurred while writing the selected sample to the " "output files" ) from e finally: mrs_input_file.close() mrs_output_file.close() mrs_error_file.close()
def __setup_probabilities(self, distribution_dict): """Computes the probability ratio, probability ratio supremum and acceptance probability for each sample. Parameters ---------- distribution : dict Dictionary whose keys contain the name whose distribution changed. The values are tuples or lists with two entries. The first entry is a probability density (mass) function for the old distribution, while the second entry is the probability density function for the new distribution. """ self.sup_ratio = 1 for sample in self.all_sample_list: sample.probability_ratio = self.__compute_probability_ratio( sample, distribution_dict ) self.sup_ratio = max(self.sup_ratio, sample.probability_ratio) for sample in self.all_sample_list: sample.acceptance_probability = sample.probability_ratio / self.sup_ratio self.expected_sample_size = self.original_sample_size // self.sup_ratio def __compute_probability_ratio(self, sample, distribution_dict): """Computes the ratio of the new probability to the old probability for the given sample Parameters ---------- sample: SampleInformation Sample information used to extract the values to evaluate the distributions pdf. distribution : dict Dictionary whose keys contain the name whose distribution changed. The values are tuples or lists with two entries. The first entry is a probability density (mass) function for the old distribution, while the second entry is the probability density function for the new distribution. Raises ------ ValueError Raises exception if an error occurs when computing the ratio. """ probability_ratio = 1 try: for variable in distribution_dict.keys(): value = sample.flatted_inputs_json[variable] old_pdf = distribution_dict[variable][0] new_pdf = distribution_dict[variable][1] probability_ratio *= new_pdf(value) / old_pdf(value) except Exception as e: raise ValueError( "An error occurred while evaluating the probability ratio" ) from e return probability_ratio