Skip to content

Instantly share code, notes, and snippets.

@EvanGertis
Created January 5, 2022 12:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save EvanGertis/87231c405eb93f46d7ba63867344217b to your computer and use it in GitHub Desktop.
Save EvanGertis/87231c405eb93f46d7ba63867344217b to your computer and use it in GitHub Desktop.
def maxf(s):
return s.max()
def minf(s):
return s.min()
def uniqueValue(s):
# are records in s the same? return true
if s.nunique()['Class'] == 1:
return False
# otherwise false
else:
return True
def maxPartition(maxInformationGain,maxThreshold,s,s1,s2):
print(f'informationGain: {maxInformationGain}, threshold: {maxThreshold}')
merged_partitions = pd.merge(s1,s2)
merged_partitions = pd.merge(merged_partitions,s)
print("Best Partition")
print("***************")
print(merged_partitions)
print("***************")
return merged_partitions
def information_gain(s1, s2, s):
# calculate cardinality for s1
cardinalityS1 = len(pd.Index(s1['Class']).value_counts())
print(f'The Cardinality of s1 is: {cardinalityS1}')
# calculate cardinality for s2
cardinalityS2 = len(pd.Index(s2['Class']).value_counts())
print(f'The Cardinality of s2 is: {cardinalityS2}')
# calculate cardinality of s
cardinalityS = len(pd.Index(s['Class']).value_counts())
print(f'The Cardinality of s is: {cardinalityS}')
# calculate informationGain
informationGain = (cardinalityS1/cardinalityS) * entropy(s1) + (cardinalityS2/cardinalityS) * entropy(s2)
print(f'The total informationGain is: {informationGain}')
return informationGain
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment