Skip to content

Instantly share code, notes, and snippets.

@rothnic
Created August 30, 2016 04:50
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rothnic/cc4c13f2b6eed64ee1b4512f4a85cc35 to your computer and use it in GitHub Desktop.
Save rothnic/cc4c13f2b6eed64ee1b4512f4a85cc35 to your computer and use it in GitHub Desktop.
Modeling and Simulation Uncertainty Distribution
__author__ = 'Nick'
import scipy.stats as st
from scipy.interpolate import interpolate
import numpy as np
class Distribution:
'''
The Distribution class sets up a construct for loading uncertainty data that was generated as user input to
define the probability distribution, interpolating between the given points to create the CDF, then sampling
across the CDF to generate the PDF. The PDF is then used to generate a scipy :class:`~scipy.stats.rv_discrete`
distribution object. The scipy distribution can then be later used for sampling the distribution.
'''
def __init__(self, probData, valueData, output):
'''
Initializes the Distribution object with the user created probability data and the name of the uncertainty
variable that the distribution represents.
:param probData: A :class:`numpy.ndarray` of probabilities for the uncertainty
:param valueData: A :class:`numpy.ndarray` of values associated with the probabilities for the uncertainty
:param output: The name of the uncertainty as a string
:return: initialized Distribution object
'''
self.output = output
self.probData = probData
self.valueData = valueData
self.make_cdf()
self.make_pdf()
def sample(self, prob):
'''
Samples the initialized distribution to retrieve the value with an occurrence of the provided probability.
This is referred to as the Percent Point Function. See :meth:`scipy.stats.rv_discrete.ppf` for more
information.
:param prob: Value between 0 and 1, representing the probability of occurrence of the returned value
:return: The value associated with the provided probability
'''
# Sample the Percent Point Function (PPF). Given a probability it returns a value
# expected to occur at the rate of the probability.
return self.pdf.ppf(prob)
def make_cdf(self):
'''
Generates a CDF from user provided data by using the :func:`scipy.interpolate.splmake` function
:return: representation of the spline interpolated CDF that was initialized into the Distribution object
'''
self.cdf = interpolate.splmake(self.valueData, self.probData, order=1)
def make_pdf(self):
'''
Generates a PDF from the spline-interpolated CDF. This method crudely calculates the derivative of the CDF by
calculating the change in likelihood across the distribution. Next, it provides the bins used for sampling
the CDF, along with the the difference in probability between the bins to :class:`~scipy.stats.rv_discrete`,
and in return receives the :class:`~scipy.stats.rv_discrete` object, then saves it into the
:class:`~Uncertainties.calc_uncertainties.Distribution` object for later sampling using the
:py:meth:`~Uncertainties.calc_uncertainties.Distribution.sample` method.
:return: None
'''
# Divide up bins across value range to sample probability changes
bins = np.linspace(min(self.valueData), max(self.valueData), 50)
diff = np.zeros(len(bins))
# Step through and calculate change in probability for values across CDF to yield PDF
for i in range(1, len(bins) - 1):
start = interpolate.spleval(self.cdf, bins[i])
end = interpolate.spleval(self.cdf, bins[i + 1])
diff[i] = end - start
# Generate the PDF from interpolated and sampled CDF
self.pdf = st.rv_discrete(name=self.output, values=(bins, diff))
pedsPerHourOn_prob pedsPerHourOn pedsPerHourOff_prob pedsPerHourOff tileUnitCost_prob tileUnitCost elecUtilityRate_prob elecUtilityRate panelEff_prob panelEff circuitLoss_prob circuitLoss turbineEff_prob turbineEff baselineOceanVoyPowerUse_prob baselineOceanVoyPowerUse baselineTotalPowerUse_prob baselineTotalPowerUse
0 50 0 50 0 350 0 0.06 0 0.02 0 0.2 0 0.3 0 10800000 0 26900000
0.02 150 0.05 150 0.03 450 0.0005 0.07 0.0005 0.063 0.0005 0.225 0.0005 0.31 0.009 12000000 0.009 27000000
0.05 250 0.12 250 0.09 550 0.005 0.08 0.005 0.106 0.005 0.255 0.005 0.32 0.05 13200000 0.05 27100000
0.1 350 0.22 350 0.2 650 0.05 0.09 0.05 0.149 0.05 0.275 0.05 0.33 0.1 14400000 0.1 27200000
0.19 450 0.42 450 0.35 750 0.25 0.1 0.3 0.205 0.3 0.295 0.3 0.355 0.17 15600000 0.17 27300000
0.33 550 0.64 550 0.55 850 0.55 0.11 0.5 0.235 0.5 0.305 0.5 0.37 0.35 16800000 0.35 27400000
0.5 650 0.77 650 0.75 950 0.9 0.12 0.9 0.285 0.9 0.335 0.9 0.39 0.65 18000000 0.65 27500000
0.64 750 0.83 750 0.84 1050 0.95 0.13 0.99 0.321 0.99 0.355 0.99 0.41 0.86 19200000 0.86 27600000
0.75 850 0.88 850 0.9 1150 0.99 0.15 0.999 0.364 0.999 0.375 0.999 0.43 0.94 20400000 0.94 27700000
0.83 950 0.93 950 0.94 1250 1 0.16 1 0.45 1 0.4 1 0.45 0.99 21600000 0.99 27800000
0.89 1050 0.95 1050 0.97 1350 0.999 22800000 0.999 27900000
0.93 1150 0.97 1150 0.99 1450 1 24000000 1 28000000
0.96 1250 0.98 1250 0.999 1550
0.98 1350 0.99 1350 1 1650
0.999 1450 0.999 1450
1 1550 1 1550
__author__ = 'Nick'
import os
from openmdao.main.api import Component
from openmdao.lib.datatypes.api import Float
import pandas as pd
from calc_uncertainties import Distribution
from Common.AttributeTools.io import get_outputs
class UncertaintiesModel(Component):
# set up inputs
pedsPerHourOn_prob = Float(0.5, iotype='in', desc='avg peds per hour on season distribution sample point')
pedsPerHourOff_prob = Float(0.5, iotype='in', desc='avg peds per hour off season distribution sample point')
tileUnitCost_prob = Float(0.5, iotype='in', desc='cost of a tribo tile distribution sample point')
elecUtilityRate_prob = Float(0.5, iotype='in', desc='cost of electricity distribution sample point')
panelEff_prob = Float(0.5, iotype='in', desc='eff in conversion of sun energy distribution sample point')
circuitLoss_prob = Float(0.5, iotype='in', desc='eff in collecting electrical energy distribution sample point')
turbineEff_prob = Float(0.5, iotype='in', desc='eff in collecting electrical energy distribution sample point')
baselineOceanVoyPowerUse_prob = Float(0.5, iotype='in',desc='current power use per year for OV distribution sample point')
baselineTotalPowerUse_prob = Float(0.5, iotype='in', desc='current power use per year for total aquarium '
'distribution sample point')
# set up outputs
pedsPerHourOn = Float(600.0, iotype='out', desc='avg peds per hour on season distribution sample value')
pedsPerHourOff = Float(500.0, iotype='out', desc='avg peds per hour off season distribution sample value')
tileUnitCost = Float(800.0, iotype='out', desc='cost of a tribo tile distribution sample value')
elecUtilityRate = Float(0.1, iotype='out', desc='cost of electricity distribution sample value')
panelEff = Float(0.23, iotype='out', desc='eff in conversion of sun energy distribution sample value')
circuitLoss = Float(0.30, iotype='out', desc='eff in collecting electrical energy distribution sample value')
turbineEff = Float(0.367, iotype='out', desc='eff in collecting electrical energy distribution sample value')
baselineOceanVoyPowerUse = Float(17265306.1224, iotype='out', desc='current power use per year for OV '
'distribution sample value')
baselineTotalPowerUse = Float(27465306.1224, iotype='out', desc='current power use per year for total '
'aquarium distribution sample value')
# set up constants
# None defined
# initialization
def __init__(self):
'''
The constructor of the OpenMDAO component is extended to initialize a :class:`list` to contain the
distributions, then it loads the CSV file containing the uncertainties data, then initializes the
:class:`Uncertainties.calc_uncertainties.Distribution` objects, and stores them in the list.
:return: None
'''
super(UncertaintiesModel, self).__init__()
# set up constants
self.distributions = []
path = os.path.dirname(os.path.realpath(__file__))
self.my_outputs = get_outputs(self)
self.filename = path + '\\uncertainties.csv'
self.init_distributions(self.filename)
# primary component method
def execute(self):
'''
The primary method that OpenMDAO requires that you populate for any component with the behavior you want the
component to have. In this case, the component is just a grouping of uncertainty variable distributions. It
is possible to integrate this capability into the each model, but this provides a way to consolidate them
into one location.
On each execution, this component loops through all saved distributions, then samples them with a probability
value (Percent Point Function) to retrieve the value that occurs at the given probability. The values are
stored into the output attributes of the UncertaintiesModel component. See
:class:`Uncertainties.calc_uncertainties.Distribution` for more information.
:return: None
'''
# ToDo: Determine if it would be better to integrate this capability into individual model components
# Loop through and sample all of my uncertainties
for dist in self.distributions:
# Get latest value from input probabilities
probInput = getattr(self, dist.output + "_prob")
setattr(self, dist.output, dist.sample(probInput))
def init_distributions(self, filename):
'''
Inits probability and associated values from a provided CSV file. This enables you to defined the uncertain
variables by cumulative distribution functions in two columns for each variable. One defines the variable
probabilities and must match the Uncertainties component configured input name with an appended '_prob'.
Second is a column with the associated values, with a column name that matches the output name configured on
the Uncertainties component. Example table of a normal-like distribution below:
================== =============
myUncertainty_prob myUncertainty
================== =============
0 10
0.25 15
0.5 30
0.75 45
1 50
================== =============
.. note:: The CSV file can include many uncertainties input as pairs of columns, and each does not need to \
have equal rows.
:param filename: Location of the CSV file to load
:return: None, saves distributions into a python :class:`list`
'''
# Read in the CSV file
table = pd.read_csv(filename)
# Create distributions for each prob,value pair in the CSV file
for output in self.my_outputs:
probData, valueData = [], []
for col in table.columns.values.tolist():
if output in col:
if "prob" in col:
probData = table[col] # Probability series
else:
valueData = table[col] # Value series
# Make sure we found the columns before creating distribution, otherwise raise error
if isinstance(probData, pd.Series) and isinstance(valueData, pd.Series):
self.distributions.append(Distribution(probData, valueData, output))
else:
raise NameError
if __name__ == "__main__":
# Module test routine, executes when this python file is ran independently
# For example, using Pycharm, right click while editing and select Run
from test_uncertainties import test_uncertainties_component
test_uncertainties_component()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment