Last active
January 23, 2022 07:11
-
-
Save sakibguy/25b5f36038e0388e1cf94f2f7e563b23 to your computer and use it in GitHub Desktop.
Credit: Udacity AWS ML... Gaussian Code Exercise Read through the code below and fill out the TODOs. You'll find a cell at the end of the Jupyter notebook containing unit tests. After you've run the code cell with the Gaussian class, you can run the final cell to check that your code functions as expected. For this exercise, you will use a file …
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import math | |
import matplotlib.pyplot as plt | |
class Gaussian(): | |
""" Gaussian distribution class for calculating and | |
visualizing a Gaussian distribution. | |
Attributes: | |
mean (float) representing the mean value of the distribution | |
stdev (float) representing the standard deviation of the distribution | |
data_list (list of floats) a list of floats extracted from the data file | |
""" | |
def __init__(self, mu = 0, sigma = 1): | |
self.mean = mu | |
self.stdev = sigma | |
self.data = [] | |
def calculate_mean(self): | |
"""Method to calculate the mean of the data set. | |
Args: | |
None | |
Returns: | |
float: mean of the data set | |
""" | |
#TODO: Calculate the mean of the data set. Remember that the data set is stored in self.data | |
# Change the value of the mean attribute to be the mean of the data set | |
# Return the mean of the data set | |
pass | |
def calculate_stdev(self, sample=True): | |
"""Method to calculate the standard deviation of the data set. | |
Args: | |
sample (bool): whether the data represents a sample or population | |
Returns: | |
float: standard deviation of the data set | |
""" | |
# TODO: | |
# Calculate the standard deviation of the data set | |
# | |
# The sample variable determines if the data set contains a sample or a population | |
# If sample = True, this means the data is a sample. | |
# Keep the value of sample in mind for calculating the standard deviation | |
# | |
# Make sure to update self.stdev and return the standard deviation as well | |
pass | |
def read_data_file(self, file_name, sample=True): | |
"""Method to read in data from a txt file. The txt file should have | |
one number (float) per line. The numbers are stored in the data attribute. | |
After reading in the file, the mean and standard deviation are calculated | |
Args: | |
file_name (string): name of a file to read from | |
Returns: | |
None | |
""" | |
# This code opens a data file and appends the data to a list called data_list | |
with open(file_name) as file: | |
data_list = [] | |
line = file.readline() | |
while line: | |
data_list.append(int(line)) | |
line = file.readline() | |
file.close() | |
# TODO: | |
# Update the self.data attribute with the data_list | |
# Update self.mean with the mean of the data_list. | |
# You can use the calculate_mean() method with self.calculate_mean() | |
# Update self.stdev with the standard deviation of the data_list. Use the | |
# calcaulte_stdev() method. | |
def plot_histogram(self): | |
"""Method to output a histogram of the instance variable data using | |
matplotlib pyplot library. | |
Args: | |
None | |
Returns: | |
None | |
""" | |
# TODO: Plot a histogram of the data_list using the matplotlib package. | |
# Be sure to label the x and y axes and also give the chart a title | |
def pdf(self, x): | |
"""Probability density function calculator for the gaussian distribution. | |
Args: | |
x (float): point for calculating the probability density function | |
Returns: | |
float: probability density function output | |
""" | |
# TODO: Calculate the probability density function of the Gaussian distribution | |
# at the value x. You'll need to use self.stdev and self.mean to do the calculation | |
pass | |
def plot_histogram_pdf(self, n_spaces = 50): | |
"""Method to plot the normalized histogram of the data and a plot of the | |
probability density function along the same range | |
Args: | |
n_spaces (int): number of data points | |
Returns: | |
list: x values for the pdf plot | |
list: y values for the pdf plot | |
""" | |
#TODO: Nothing to do for this method. Try it out and see how it works. | |
mu = self.mean | |
sigma = self.stdev | |
min_range = min(self.data) | |
max_range = max(self.data) | |
# calculates the interval between x values | |
interval = 1.0 * (max_range - min_range) / n_spaces | |
x = [] | |
y = [] | |
# calculate the x values to visualize | |
for i in range(n_spaces): | |
tmp = min_range + interval*i | |
x.append(tmp) | |
y.append(self.pdf(tmp)) | |
# make the plots | |
fig, axes = plt.subplots(2,sharex=True) | |
fig.subplots_adjust(hspace=.5) | |
axes[0].hist(self.data, density=True) | |
axes[0].set_title('Normed Histogram of Data') | |
axes[0].set_ylabel('Density') | |
axes[1].plot(x, y) | |
axes[1].set_title('Normal Distribution for \n Sample Mean and Sample Standard Deviation') | |
axes[0].set_ylabel('Density') | |
plt.show() | |
return x, y |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Unit tests to check your solution
import unittest
class TestGaussianClass(unittest.TestCase):
def setUp(self):
self.gaussian = Gaussian(25, 2)
tests = TestGaussianClass()
tests_loaded = unittest.TestLoader().loadTestsFromModule(tests)
unittest.TextTestRunner().run(tests_loaded)