rothnic/Uncertainties.py

## calc_uncertainties.py
__author__ = 'Nick'

import scipy.stats as st
from scipy.interpolate import interpolate
import numpy as np

class Distribution:
    '''
    The Distribution class sets up a construct for loading uncertainty data that was generated as user input to
    define the probability distribution, interpolating between the given points to create the CDF, then sampling
    across the CDF to generate the PDF. The PDF is then used to generate a scipy :class:`~scipy.stats.rv_discrete`
    distribution object. The scipy distribution can then be later used for sampling the distribution.
    '''
    def __init__(self, probData, valueData, output):
        '''
        Initializes the Distribution object with the user created probability data and the name of the uncertainty
        variable that the distribution represents.

        :param probData: A :class:`numpy.ndarray` of probabilities for the uncertainty
        :param valueData: A :class:`numpy.ndarray` of values associated with the probabilities for the uncertainty
        :param output: The name of the uncertainty as a string
        :return: initialized Distribution object
        '''
        self.output = output
        self.probData = probData
        self.valueData = valueData
        self.make_cdf()
        self.make_pdf()

    def sample(self, prob):
        '''
        Samples the initialized distribution to retrieve the value with an occurrence of the provided probability.
        This is referred to as the Percent Point Function. See :meth:`scipy.stats.rv_discrete.ppf` for more
        information.

        :param prob: Value between 0 and 1, representing the probability of occurrence of the returned value
        :return: The value associated with the provided probability
        '''

        # Sample the Percent Point Function (PPF). Given a probability it returns a value
        # expected to occur at the rate of the probability.
        return self.pdf.ppf(prob)

    def make_cdf(self):
        '''
        Generates a CDF from user provided data by using the :func:`scipy.interpolate.splmake` function

        :return: representation of the spline interpolated CDF that was initialized into the Distribution object
        '''
        self.cdf = interpolate.splmake(self.valueData, self.probData, order=1)

    def make_pdf(self):
        '''
        Generates a PDF from the spline-interpolated CDF. This method crudely calculates the derivative of the CDF by
        calculating the change in likelihood across the distribution. Next, it provides the bins used for sampling
        the CDF, along with the the difference in probability between the bins to :class:`~scipy.stats.rv_discrete`,
        and in return receives the :class:`~scipy.stats.rv_discrete` object, then saves it into the
        :class:`~Uncertainties.calc_uncertainties.Distribution` object for later sampling using the
        :py:meth:`~Uncertainties.calc_uncertainties.Distribution.sample` method.

        :return: None
        '''

        # Divide up bins across value range to sample probability changes
        bins = np.linspace(min(self.valueData), max(self.valueData), 50)
        diff = np.zeros(len(bins))

        # Step through and calculate change in probability for values across CDF to yield PDF
        for i in range(1, len(bins) - 1):
            start = interpolate.spleval(self.cdf, bins[i])
            end = interpolate.spleval(self.cdf, bins[i + 1])
            diff[i] = end - start

        # Generate the PDF from interpolated and sampled CDF
        self.pdf = st.rv_discrete(name=self.output, values=(bins, diff))

## uncertainties.csv

          
            pedsPerHourOn_prob
            pedsPerHourOn
            pedsPerHourOff_prob
            pedsPerHourOff
            tileUnitCost_prob
            tileUnitCost
            elecUtilityRate_prob
            elecUtilityRate
            panelEff_prob
            panelEff
            circuitLoss_prob
            circuitLoss
            turbineEff_prob
            turbineEff
            baselineOceanVoyPowerUse_prob
            baselineOceanVoyPowerUse
            baselineTotalPowerUse_prob
            baselineTotalPowerUse

            
              0
              50
              0
              50
              0
              350
              0
              0.06
              0
              0.02
              0
              0.2
              0
              0.3
              0
              10800000
              0
              26900000

            
              0.02
              150
              0.05
              150
              0.03
              450
              0.0005
              0.07
              0.0005
              0.063
              0.0005
              0.225
              0.0005
              0.31
              0.009
              12000000
              0.009
              27000000

            
              0.05
              250
              0.12
              250
              0.09
              550
              0.005
              0.08
              0.005
              0.106
              0.005
              0.255
              0.005
              0.32
              0.05
              13200000
              0.05
              27100000

            
              0.1
              350
              0.22
              350
              0.2
              650
              0.05
              0.09
              0.05
              0.149
              0.05
              0.275
              0.05
              0.33
              0.1
              14400000
              0.1
              27200000

            
              0.19
              450
              0.42
              450
              0.35
              750
              0.25
              0.1
              0.3
              0.205
              0.3
              0.295
              0.3
              0.355
              0.17
              15600000
              0.17
              27300000

            
              0.33
              550
              0.64
              550
              0.55
              850
              0.55
              0.11
              0.5
              0.235
              0.5
              0.305
              0.5
              0.37
              0.35
              16800000
              0.35
              27400000

            
              0.5
              650
              0.77
              650
              0.75
              950
              0.9
              0.12
              0.9
              0.285
              0.9
              0.335
              0.9
              0.39
              0.65
              18000000
              0.65
              27500000

            
              0.64
              750
              0.83
              750
              0.84
              1050
              0.95
              0.13
              0.99
              0.321
              0.99
              0.355
              0.99
              0.41
              0.86
              19200000
              0.86
              27600000

            
              0.75
              850
              0.88
              850
              0.9
              1150
              0.99
              0.15
              0.999
              0.364
              0.999
              0.375
              0.999
              0.43
              0.94
              20400000
              0.94
              27700000

            
              0.83
              950
              0.93
              950
              0.94
              1250
              1
              0.16
              1
              0.45
              1
              0.4
              1
              0.45
              0.99
              21600000
              0.99
              27800000

            
              0.89
              1050
              0.95
              1050
              0.97
              1350
              
              
              0.999
              22800000
              0.999
              27900000

            
              0.93
              1150
              0.97
              1150
              0.99
              1450
              
              
              1
              24000000
              1
              28000000

            
              0.96
              1250
              0.98
              1250
              0.999
              1550

            
              0.98
              1350
              0.99
              1350
              1
              1650

            
              0.999
              1450
              0.999
              1450

            
              1
              1550
              1
              1550

## Uncertainties.py
__author__ = 'Nick'

import os

from openmdao.main.api import Component
from openmdao.lib.datatypes.api import Float
import pandas as pd

from calc_uncertainties import Distribution
from Common.AttributeTools.io import get_outputs


class UncertaintiesModel(Component):
    # set up inputs
    pedsPerHourOn_prob = Float(0.5, iotype='in', desc='avg peds per hour on season distribution sample point')
    pedsPerHourOff_prob = Float(0.5, iotype='in', desc='avg peds per hour off season distribution sample point')
    tileUnitCost_prob = Float(0.5, iotype='in', desc='cost of a tribo tile distribution sample point')
    elecUtilityRate_prob = Float(0.5, iotype='in', desc='cost of electricity distribution sample point')
    panelEff_prob = Float(0.5, iotype='in', desc='eff in conversion of sun energy distribution sample point')
    circuitLoss_prob = Float(0.5, iotype='in', desc='eff in collecting electrical energy distribution sample point')
    turbineEff_prob = Float(0.5, iotype='in', desc='eff in collecting electrical energy distribution sample point')
    baselineOceanVoyPowerUse_prob = Float(0.5, iotype='in',desc='current power use per year for OV distribution sample point')
    baselineTotalPowerUse_prob = Float(0.5, iotype='in', desc='current power use per year for total aquarium '
                                                              'distribution sample point')

    # set up outputs
    pedsPerHourOn = Float(600.0, iotype='out', desc='avg peds per hour on season distribution sample value')
    pedsPerHourOff = Float(500.0, iotype='out', desc='avg peds per hour off season distribution sample value')
    tileUnitCost = Float(800.0, iotype='out', desc='cost of a tribo tile distribution sample value')
    elecUtilityRate = Float(0.1, iotype='out', desc='cost of electricity distribution sample value')
    panelEff = Float(0.23, iotype='out', desc='eff in conversion of sun energy distribution sample value')
    circuitLoss = Float(0.30, iotype='out', desc='eff in collecting electrical energy distribution sample value')
    turbineEff = Float(0.367, iotype='out', desc='eff in collecting electrical energy distribution sample value')
    baselineOceanVoyPowerUse = Float(17265306.1224, iotype='out', desc='current power use per year for OV '
                                                                       'distribution sample value')
    baselineTotalPowerUse = Float(27465306.1224, iotype='out', desc='current power use per year for total '
                                                                    'aquarium distribution sample value')

    # set up constants
    # None defined

    # initialization
    def __init__(self):
        '''
        The constructor of the OpenMDAO component is extended to initialize a :class:`list` to contain the
        distributions, then it loads the CSV file containing the uncertainties data, then initializes the
        :class:`Uncertainties.calc_uncertainties.Distribution` objects, and stores them in the list.

        :return: None
        '''
        super(UncertaintiesModel, self).__init__()
        # set up constants
        self.distributions = []
        path = os.path.dirname(os.path.realpath(__file__))
        self.my_outputs = get_outputs(self)
        self.filename = path + '\\uncertainties.csv'
        self.init_distributions(self.filename)

    # primary component method
    def execute(self):
        '''
        The primary method that OpenMDAO requires that you populate for any component with the behavior you want the
        component to have. In this case, the component is just a grouping of uncertainty variable distributions. It
        is possible to integrate this capability into the each model, but this provides a way to consolidate them
        into one location.

        On each execution, this component loops through all saved distributions, then samples them with a probability
        value (Percent Point Function) to retrieve the value that occurs at the given probability. The values are
        stored into the output attributes of the UncertaintiesModel component. See
        :class:`Uncertainties.calc_uncertainties.Distribution` for more information.

        :return: None
        '''
        # ToDo: Determine if it would be better to integrate this capability into individual model components

        # Loop through and sample all of my uncertainties
        for dist in self.distributions:
            # Get latest value from input probabilities
            probInput = getattr(self, dist.output + "_prob")
            setattr(self, dist.output, dist.sample(probInput))

    def init_distributions(self, filename):
        '''
        Inits probability and associated values from a provided CSV file. This enables you to defined the uncertain
        variables by cumulative distribution functions in two columns for each variable. One defines the variable
        probabilities and must match the Uncertainties component configured input name with an appended '_prob'.
        Second is a column with the associated values, with a column name that matches the output name configured on
        the Uncertainties component. Example table of a normal-like distribution below:

        ================== =============
        myUncertainty_prob myUncertainty
        ================== =============
        0                  10
        0.25               15
        0.5                30
        0.75               45
        1                  50
        ================== =============

        .. note:: The CSV file can include many uncertainties input as pairs of columns, and each does not need to \
        have equal rows.

        :param filename: Location of the CSV file to load
        :return: None, saves distributions into a python :class:`list`
        '''

        # Read in the CSV file
        table = pd.read_csv(filename)

        # Create distributions for each prob,value pair in the CSV file
        for output in self.my_outputs:
            probData, valueData = [], []
            for col in table.columns.values.tolist():
                if output in col:
                    if "prob" in col:
                        probData = table[col]  # Probability series
                    else:
                        valueData = table[col]  # Value series

            # Make sure we found the columns before creating distribution, otherwise raise error
            if isinstance(probData, pd.Series) and isinstance(valueData, pd.Series):
                self.distributions.append(Distribution(probData, valueData, output))
            else:
                raise NameError

if __name__ == "__main__":
    # Module test routine, executes when this python file is ran independently
    # For example, using Pycharm, right click while editing and select Run
    from test_uncertainties import test_uncertainties_component
    test_uncertainties_component()
	__author__ = 'Nick'

	import scipy.stats as st
	from scipy.interpolate import interpolate
	import numpy as np

	class Distribution:
	'''
	The Distribution class sets up a construct for loading uncertainty data that was generated as user input to
	define the probability distribution, interpolating between the given points to create the CDF, then sampling
	across the CDF to generate the PDF. The PDF is then used to generate a scipy :class:`~scipy.stats.rv_discrete`
	distribution object. The scipy distribution can then be later used for sampling the distribution.
	'''
	def __init__(self, probData, valueData, output):
	'''
	Initializes the Distribution object with the user created probability data and the name of the uncertainty
	variable that the distribution represents.

	:param probData: A :class:`numpy.ndarray` of probabilities for the uncertainty
	:param valueData: A :class:`numpy.ndarray` of values associated with the probabilities for the uncertainty
	:param output: The name of the uncertainty as a string
	:return: initialized Distribution object
	'''
	self.output = output
	self.probData = probData
	self.valueData = valueData
	self.make_cdf()
	self.make_pdf()

	def sample(self, prob):
	'''
	Samples the initialized distribution to retrieve the value with an occurrence of the provided probability.
	This is referred to as the Percent Point Function. See :meth:`scipy.stats.rv_discrete.ppf` for more
	information.

	:param prob: Value between 0 and 1, representing the probability of occurrence of the returned value
	:return: The value associated with the provided probability
	'''

	# Sample the Percent Point Function (PPF). Given a probability it returns a value
	# expected to occur at the rate of the probability.
	return self.pdf.ppf(prob)

	def make_cdf(self):
	'''
	Generates a CDF from user provided data by using the :func:`scipy.interpolate.splmake` function

	:return: representation of the spline interpolated CDF that was initialized into the Distribution object
	'''
	self.cdf = interpolate.splmake(self.valueData, self.probData, order=1)

	def make_pdf(self):
	'''
	Generates a PDF from the spline-interpolated CDF. This method crudely calculates the derivative of the CDF by
	calculating the change in likelihood across the distribution. Next, it provides the bins used for sampling
	the CDF, along with the the difference in probability between the bins to :class:`~scipy.stats.rv_discrete`,
	and in return receives the :class:`~scipy.stats.rv_discrete` object, then saves it into the
	:class:`~Uncertainties.calc_uncertainties.Distribution` object for later sampling using the
	:py:meth:`~Uncertainties.calc_uncertainties.Distribution.sample` method.

	:return: None
	'''

	# Divide up bins across value range to sample probability changes
	bins = np.linspace(min(self.valueData), max(self.valueData), 50)
	diff = np.zeros(len(bins))

	# Step through and calculate change in probability for values across CDF to yield PDF
	for i in range(1, len(bins) - 1):
	start = interpolate.spleval(self.cdf, bins[i])
	end = interpolate.spleval(self.cdf, bins[i + 1])
	diff[i] = end - start

	# Generate the PDF from interpolated and sampled CDF
	self.pdf = st.rv_discrete(name=self.output, values=(bins, diff))
pedsPerHourOn_prob	pedsPerHourOn	pedsPerHourOff_prob	pedsPerHourOff	tileUnitCost_prob	tileUnitCost	elecUtilityRate_prob	elecUtilityRate	panelEff_prob	panelEff	circuitLoss_prob	circuitLoss	turbineEff_prob	turbineEff	baselineOceanVoyPowerUse_prob	baselineOceanVoyPowerUse	baselineTotalPowerUse_prob	baselineTotalPowerUse
0	50	0	50	0	350	0	0.06	0	0.02	0	0.2	0	0.3	0	10800000	0	26900000
0.02	150	0.05	150	0.03	450	0.0005	0.07	0.0005	0.063	0.0005	0.225	0.0005	0.31	0.009	12000000	0.009	27000000
0.05	250	0.12	250	0.09	550	0.005	0.08	0.005	0.106	0.005	0.255	0.005	0.32	0.05	13200000	0.05	27100000
0.1	350	0.22	350	0.2	650	0.05	0.09	0.05	0.149	0.05	0.275	0.05	0.33	0.1	14400000	0.1	27200000
0.19	450	0.42	450	0.35	750	0.25	0.1	0.3	0.205	0.3	0.295	0.3	0.355	0.17	15600000	0.17	27300000
0.33	550	0.64	550	0.55	850	0.55	0.11	0.5	0.235	0.5	0.305	0.5	0.37	0.35	16800000	0.35	27400000
0.5	650	0.77	650	0.75	950	0.9	0.12	0.9	0.285	0.9	0.335	0.9	0.39	0.65	18000000	0.65	27500000
0.64	750	0.83	750	0.84	1050	0.95	0.13	0.99	0.321	0.99	0.355	0.99	0.41	0.86	19200000	0.86	27600000
0.75	850	0.88	850	0.9	1150	0.99	0.15	0.999	0.364	0.999	0.375	0.999	0.43	0.94	20400000	0.94	27700000
0.83	950	0.93	950	0.94	1250	1	0.16	1	0.45	1	0.4	1	0.45	0.99	21600000	0.99	27800000
0.89	1050	0.95	1050	0.97	1350									0.999	22800000	0.999	27900000
0.93	1150	0.97	1150	0.99	1450									1	24000000	1	28000000
0.96	1250	0.98	1250	0.999	1550
0.98	1350	0.99	1350	1	1650
0.999	1450	0.999	1450
1	1550	1	1550
	__author__ = 'Nick'

	import os

	from openmdao.main.api import Component
	from openmdao.lib.datatypes.api import Float
	import pandas as pd

	from calc_uncertainties import Distribution
	from Common.AttributeTools.io import get_outputs


	class UncertaintiesModel(Component):
	# set up inputs
	pedsPerHourOn_prob = Float(0.5, iotype='in', desc='avg peds per hour on season distribution sample point')
	pedsPerHourOff_prob = Float(0.5, iotype='in', desc='avg peds per hour off season distribution sample point')
	tileUnitCost_prob = Float(0.5, iotype='in', desc='cost of a tribo tile distribution sample point')
	elecUtilityRate_prob = Float(0.5, iotype='in', desc='cost of electricity distribution sample point')
	panelEff_prob = Float(0.5, iotype='in', desc='eff in conversion of sun energy distribution sample point')
	circuitLoss_prob = Float(0.5, iotype='in', desc='eff in collecting electrical energy distribution sample point')
	turbineEff_prob = Float(0.5, iotype='in', desc='eff in collecting electrical energy distribution sample point')
	baselineOceanVoyPowerUse_prob = Float(0.5, iotype='in',desc='current power use per year for OV distribution sample point')
	baselineTotalPowerUse_prob = Float(0.5, iotype='in', desc='current power use per year for total aquarium '
	'distribution sample point')

	# set up outputs
	pedsPerHourOn = Float(600.0, iotype='out', desc='avg peds per hour on season distribution sample value')
	pedsPerHourOff = Float(500.0, iotype='out', desc='avg peds per hour off season distribution sample value')
	tileUnitCost = Float(800.0, iotype='out', desc='cost of a tribo tile distribution sample value')
	elecUtilityRate = Float(0.1, iotype='out', desc='cost of electricity distribution sample value')
	panelEff = Float(0.23, iotype='out', desc='eff in conversion of sun energy distribution sample value')
	circuitLoss = Float(0.30, iotype='out', desc='eff in collecting electrical energy distribution sample value')
	turbineEff = Float(0.367, iotype='out', desc='eff in collecting electrical energy distribution sample value')
	baselineOceanVoyPowerUse = Float(17265306.1224, iotype='out', desc='current power use per year for OV '
	'distribution sample value')
	baselineTotalPowerUse = Float(27465306.1224, iotype='out', desc='current power use per year for total '
	'aquarium distribution sample value')

	# set up constants
	# None defined

	# initialization
	def __init__(self):
	'''
	The constructor of the OpenMDAO component is extended to initialize a :class:`list` to contain the
	distributions, then it loads the CSV file containing the uncertainties data, then initializes the
	:class:`Uncertainties.calc_uncertainties.Distribution` objects, and stores them in the list.

	:return: None
	'''
	super(UncertaintiesModel, self).__init__()
	# set up constants
	self.distributions = []
	path = os.path.dirname(os.path.realpath(__file__))
	self.my_outputs = get_outputs(self)
	self.filename = path + '\\uncertainties.csv'
	self.init_distributions(self.filename)

	# primary component method
	def execute(self):
	'''
	The primary method that OpenMDAO requires that you populate for any component with the behavior you want the
	component to have. In this case, the component is just a grouping of uncertainty variable distributions. It
	is possible to integrate this capability into the each model, but this provides a way to consolidate them
	into one location.

	On each execution, this component loops through all saved distributions, then samples them with a probability
	value (Percent Point Function) to retrieve the value that occurs at the given probability. The values are
	stored into the output attributes of the UncertaintiesModel component. See
	:class:`Uncertainties.calc_uncertainties.Distribution` for more information.

	:return: None
	'''
	# ToDo: Determine if it would be better to integrate this capability into individual model components

	# Loop through and sample all of my uncertainties
	for dist in self.distributions:
	# Get latest value from input probabilities
	probInput = getattr(self, dist.output + "_prob")
	setattr(self, dist.output, dist.sample(probInput))

	def init_distributions(self, filename):
	'''
	Inits probability and associated values from a provided CSV file. This enables you to defined the uncertain
	variables by cumulative distribution functions in two columns for each variable. One defines the variable
	probabilities and must match the Uncertainties component configured input name with an appended '_prob'.
	Second is a column with the associated values, with a column name that matches the output name configured on
	the Uncertainties component. Example table of a normal-like distribution below:

	================== =============
	myUncertainty_prob myUncertainty
	================== =============
	0 10
	0.25 15
	0.5 30
	0.75 45
	1 50
	================== =============

	.. note:: The CSV file can include many uncertainties input as pairs of columns, and each does not need to \
	have equal rows.

	:param filename: Location of the CSV file to load
	:return: None, saves distributions into a python :class:`list`
	'''

	# Read in the CSV file
	table = pd.read_csv(filename)

	# Create distributions for each prob,value pair in the CSV file
	for output in self.my_outputs:
	probData, valueData = [], []
	for col in table.columns.values.tolist():
	if output in col:
	if "prob" in col:
	probData = table[col] # Probability series
	else:
	valueData = table[col] # Value series

	# Make sure we found the columns before creating distribution, otherwise raise error
	if isinstance(probData, pd.Series) and isinstance(valueData, pd.Series):
	self.distributions.append(Distribution(probData, valueData, output))
	else:
	raise NameError

	if __name__ == "__main__":
	# Module test routine, executes when this python file is ran independently
	# For example, using Pycharm, right click while editing and select Run
	from test_uncertainties import test_uncertainties_component
	test_uncertainties_component()