Skip to content

Instantly share code, notes, and snippets.

@DagnyTagg2013
Created December 16, 2017 17:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save DagnyTagg2013/1fbcaf59cbcc6ea19730200e527c5834 to your computer and use it in GitHub Desktop.
Save DagnyTagg2013/1fbcaf59cbcc6ea19730200e527c5834 to your computer and use it in GitHub Desktop.
BasicStatistics
# calculate 1D stats on data
data = [10, 2, 38, 23, 21, 5, 38, 23]
# sum
dataSum = sum (data)
print ('sum is: {}'.format(dataSum))
# COUNT
count = len (data)
print ('count is: {}'.format(count))
# MEAN
avg = dataSum/count
print ('mean is: {}'.format(avg))
# point-difference from mean
diff = count * [-1]
for i in range(0,count):
diff[i] = data[i] - avg
print ('point diff from mean is: {}\n'.format(diff))
# square diff
squares = count * [-1]
for i in range(0, count):
squares[i] = diff[i] ** 2
print ('square diffs from mean: {}\n'.format(squares))
# sum square diffs
sum_squares = sum (squares)
print ('sum squares: {}\n'.format(sum_squares))
# VARIANCE divide degrees freedom from history
variance = (1/count) * sum_squares
print ('variance: {}\n', variance)
# STDDEV take square root
stddev = variance ** (1/2)
print ('standard dev: {}\n', stddev)
# RANGE
min = min (data)
max = max (data)
print ('range starts at: {} to {}\n'.format(min, max))
# MORON ALERT: ATTN INDEX vs VALUE!
# MEDIAN
orderedData = data.sort()
# print (dir(data))
# print ('Sorted Data is: {}'.format(orderedData))
# count = len( orderedData )
print('count is: {}'.format(count))
# print ('10/2 {}'.format(10/2))
# print ('9/2 {}'.format(9/2))
# print ('9 % 2 {}'.format(9 % 2))
midIndex = count/2
# test for Even Number elements
if ((count % 2) == 0):
median = (data[(midIndex)] + data[(midIndex+1)])/2
else:
median = data[midIndex]
print('Found Median: {}'.format(median))
# *********************************************************************************
# MODE
count_freq = dict()
for i in range(0, count):
if (data[i] in count_freq):
count_freq[data[i]] += 1
else:
count_freq[data[i]] = 1
print ('count freq: {}\n'.format(count_freq))
# TODO: repl.it not recognizing Python3 max function on List!
print ('freq.values {}\n'.format(count_freq.values()))
# max_freq = max ( count_freq.values() )
# print ('max freq: {}\n'.format(max_freq))
# get all values of dictionary
print("\n\n*********")
print ('DEBUGGING: all methods of collection data structure: ')
print (dir(count_freq))
print("*********\n\n")
# detect MODE by REVERSE lookup => DATA VALUE to FREQUENCY COUNT goes instead to
# FREQUENCY COUNT to DATA VALUE
accum_mode = dict()
maxCount = 0
# MORON ALERT: attention to DIRECTION of map!
valueToFreqPairs = count_freq.items()
print( 'full list frequency to value pairs are: {}'.format(valueToFreqPairs) )
for value, freq in valueToFreqPairs:
# detect higher max, so throw out prior max, as well as corresponding values
# where accum_mode has frequency number as key; and values are List of
# of multiple possible modes!
if (freq > maxCount):
maxCount = freq
# ATTN: THROWAWAY - ENTIRE dictionary, including KEYs!
# accum_mode.clear()
accum_mode = dict()
# ATTN: initialize to a LIST to allow appending!
accum_mode [maxCount] = [ value ]
# MORON ALERT: attention to ALL <, =, > CASES!
elif (freq == maxCount):
# accumulate multiple MODEs to List already found for current max frequency
accum_mode[maxCount].append( value )
else:
# do nothing, as count is too low for us to care
pass
print ("Mode values found for max frequency are: {}".format(accum_mode))
# ************************************************************************************
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment