Created
January 5, 2022 12:08
-
-
Save EvanGertis/87231c405eb93f46d7ba63867344217b to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def maxf(s): | |
return s.max() | |
def minf(s): | |
return s.min() | |
def uniqueValue(s): | |
# are records in s the same? return true | |
if s.nunique()['Class'] == 1: | |
return False | |
# otherwise false | |
else: | |
return True | |
def maxPartition(maxInformationGain,maxThreshold,s,s1,s2): | |
print(f'informationGain: {maxInformationGain}, threshold: {maxThreshold}') | |
merged_partitions = pd.merge(s1,s2) | |
merged_partitions = pd.merge(merged_partitions,s) | |
print("Best Partition") | |
print("***************") | |
print(merged_partitions) | |
print("***************") | |
return merged_partitions | |
def information_gain(s1, s2, s): | |
# calculate cardinality for s1 | |
cardinalityS1 = len(pd.Index(s1['Class']).value_counts()) | |
print(f'The Cardinality of s1 is: {cardinalityS1}') | |
# calculate cardinality for s2 | |
cardinalityS2 = len(pd.Index(s2['Class']).value_counts()) | |
print(f'The Cardinality of s2 is: {cardinalityS2}') | |
# calculate cardinality of s | |
cardinalityS = len(pd.Index(s['Class']).value_counts()) | |
print(f'The Cardinality of s is: {cardinalityS}') | |
# calculate informationGain | |
informationGain = (cardinalityS1/cardinalityS) * entropy(s1) + (cardinalityS2/cardinalityS) * entropy(s2) | |
print(f'The total informationGain is: {informationGain}') | |
return informationGain |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment