Skip to content

Instantly share code, notes, and snippets.

@dbiswa4
Created April 16, 2017 23:24
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dbiswa4/afaef3a6611830eaf04e3e88d9e975a4 to your computer and use it in GitHub Desktop.
Save dbiswa4/afaef3a6611830eaf04e3e88d9e975a4 to your computer and use it in GitHub Desktop.
# Candy2.py
# by ___________
# based on skeleton code by D. Crandall, 11/2016
#
# A candy manufacturer makes 5 different types of candy bags, each of which
# is filled with lime and cherry candies but in different proportions.
#
# We've bought 100 bags chosen at random. For each bag, we've then opened
# it, randomly drawn 100 candies, and recorded the flavor of each one.
#
# We want to estimate (1) what's the actual percentage of cherry candies
# in each of the 5 bag types, and (2) what's the actual bag type of each
# of our 100 bags?
import random
import numpy
import math
from copy import deepcopy
#####
# You shouldn't have to modify this part
# These are the *actual* values of C0 ... C4 we're trying to estimate.
# Shh... They're a secret! :) This is what we're trying to estimate.
bagtype_count = 5
actual_cs = (0.2, 0.3, 0.7, 0.9, 1.0)
# Now sample 100 bags
bag_count = 100
actual_bagtypes = [ random.randrange(0, bagtype_count) for i in range(0, bag_count) ]
# Now sample 100 candies from each bag, to produce a list-of-lists
candy_count = 100
observations = [ [ ("L", "C")[x] for x in tuple(numpy.random.binomial( 1, actual_cs[ bagtype ], candy_count ) ) ] for bagtype in actual_bagtypes ]
######
# This is the part you'll want to edit
# This list will hold your estimated C0 ... C4 values, and your estimated
# bagtype for each bag.
estimated_cs = [0] * bagtype_count
estimated_bagtypes = [0] * bag_count
# Here's pseudocode for what you should implement:
#
#
# Run EM multiple times:
#
# Randomly initialize estimated_cs
# Until estimated probabilities converge:
#
# # E-step
# For each sampled bag:
# Calculate probability of the data given each model, i.e. of the observed candies in this bag assuming each of the 5 bagtypes
# Put the highest-probability bagtype into estimated_labels[bag]
#
# # M-step
# For each bagtype:
# Estimate probability c_i for this bagtype i using the bags currently assigned to this bagtype in estimated_labels
# Update estimated_cs[bagtype]
#
# Calculate probability (or log-probability) of the data given the final values of estimated_cs
#
# Select the model with the highest probability of the data given estimated_cs
j=0
previous_estimated_cs = [0] * bagtype_count
estimated_cs = numpy.random.uniform(0.0,1.0,5).tolist()
count = 0
def convergeIsNotTrue():
#improvement = max([abs(estimated_cs - previous_estimated_cs)])
print("convergeIsNotTrue ()")
print("previous_estimated_cs : ", previous_estimated_cs)
print("estimated_cs : ", estimated_cs)
improvement = max([abs(estimated_cs_i - previous_estimated_cs_i) for estimated_cs_i, previous_estimated_cs_i in zip(estimated_cs, previous_estimated_cs)])
print ("improvement: ", improvement)
if improvement > 0.005:
return True
else:
return False
# estimated_cs.append((numpy.random.uniform(0.0,1.0,5).tolist()))
while(convergeIsNotTrue()):
ind = 0
for observation in observations:
cherry_prob = float(sum([1 for k in observation if k=='C']))/float(len(observations))
temp_diff = 0.0
min_diff = 1000.0
for estimate in estimated_cs:
temp_diff = abs(estimate - cherry_prob)
if temp_diff < min_diff:
min_diff=temp_diff
temp_bagtype = estimated_cs.index(estimate)
#estimated_bagtypes[observations.index(observation)]= temp_bagtype
estimated_bagtypes[ind] = temp_bagtype
ind += 1
print("estimated_bagtypes : ", estimated_bagtypes)
previous_estimated_cs = deepcopy(estimated_cs)
print("In while:")
print("estimated_cs : ", estimated_cs)
print("previous_estimated_cs : ", previous_estimated_cs)
for bag in range(bagtype_count):
print("bag : ", bag)
'''
list1 = [1 for k in estimated_bagtypes if k==bag]
sum1 = sum(list1)
print("list1 : ", list1)
print("sum1 : ", sum1)
print ("sum([1 for k in estimated_bagtypes if k==bag])/bag_count : ", sum([1 for k in estimated_bagtypes if k==bag])/bag_count)
bag_cherry_prob = float(sum1)/float(bag_count)
'''
estimated_cs[bag] = float(sum([1 for k in estimated_bagtypes if k==bag]))/float(bag_count)
#print("estimated_cs : ", estimated_cs)
######
# You shouldn't have to modify this part -- it just spits out the results.
# Sort the estimated probabilities so they coincide with the actual ones
#
sorted_cs = sorted((e,i) for i,e in enumerate(estimated_cs))
estimated_cs = [ v[0] for v in sorted_cs ]
index_remapper = [ 0 ] * bagtype_count
for i in range(0, bagtype_count):
index_remapper[ sorted_cs[i][1] ] = i
estimated_bagtypes = [ index_remapper[bagtype] for bagtype in estimated_bagtypes ]
print ("Actual C's: ", actual_cs)
print ("Estimated C's: ", estimated_cs)
print ("Actual bagtypes: ", actual_bagtypes)
print ("Estimated bagtypes: ", estimated_bagtypes)
print ("Correctly estimated bags: ", sum( [ actual_bagtypes[i] == estimated_bagtypes[i] for i in range(0, len(estimated_bagtypes) ) ] ))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment