Skip to content

Instantly share code, notes, and snippets.

@kevinAlbs
Created July 27, 2016 16:25
Show Gist options
  • Save kevinAlbs/d3a222ea51f3e7b43c8b92830d8b47e7 to your computer and use it in GitHub Desktop.
Save kevinAlbs/d3a222ea51f3e7b43c8b92830d8b47e7 to your computer and use it in GitHub Desktop.
import numeric_string_parser | from numeric_string_parser import NumericStringParser
SPECS = { SPECS = {
'description' : 'Provides functions for statistical analysis', 'description' : 'Provides functions for statistical analysis',
'functions' : { 'functions' : {
"gen_math" :{ "gen_math" :{
'param_order': ['formula', 'xParam'], 'param_order': ['formula', 'xParam'],
'param': { 'param': {
'formula': { 'formula': {
'type': 'text', 'type': 'text',
'comment': 'Expression to be evaluated', 'comment': 'Expression to be evaluated',
}, },
'xParam': { 'xParam': {
'type': 'field_reference numeric', 'type': 'field_reference numeric',
'comment': 'x values', 'comment': 'x values',
} }
> },
> 'entry_result': {
> 'result' : 'numeric'
} }
}, },
'basic' : { 'basic' : {
'param': { 'param': {
'field': { 'field': {
'type' : 'field_reference numeric', 'type' : 'field_reference numeric',
'comment': 'Field to analyze', 'comment': 'Field to analyze',
} }
}, },
'aggregate_result': { 'aggregate_result': {
'total': 'numeric', 'total': 'numeric',
'max' : 'numeric', 'max' : 'numeric',
'min' : 'numeric', 'min' : 'numeric',
'average' : 'numeric', 'average' : 'numeric',
'variance' : 'numeric', 'variance' : 'numeric',
'standard_deviation' : 'numeric' 'standard_deviation' : 'numeric'
} }
}, },
"binary_operation" : { "binary_operation" : {
'param_order': ['field_1', 'operation', 'field_2'], 'param_order': ['field_1', 'operation', 'field_2'],
'param': { 'param': {
'field_1': { 'field_1': {
'type' : 'field_reference numeric', 'type' : 'field_reference numeric',
'comment': 'First field', 'comment': 'First field',
}, },
'field_2': { 'field_2': {
'type' : 'field_reference numeric', 'type' : 'field_reference numeric',
'comment': 'Second field' 'comment': 'Second field'
}, },
'operation' : { 'operation' : {
'type' : 'text', 'type' : 'text',
'constraints' : { 'constraints' : {
'choices' : ['+', '-', '/', '*'] 'choices' : ['+', '-', '/', '*']
} }
} }
}, },
'entry_result': { 'entry_result': {
'result' : 'numeric' 'result' : 'numeric'
} }
}, },
"correlation" : { "correlation" : {
'param': { 'param': {
'field_1': { 'field_1': {
'type' : 'field_reference numeric', 'type' : 'field_reference numeric',
'comment': 'First field', 'comment': 'First field',
}, },
'field_2': { 'field_2': {
'type' : 'field_reference numeric', 'type' : 'field_reference numeric',
'comment': 'Second field' 'comment': 'Second field'
} }
}, },
'aggregate_result': { 'aggregate_result': {
'correlation' : 'numeric' 'correlation' : 'numeric'
} }
}, },
"regression": { "regression": {
'param': { 'param': {
'field_1': { 'field_1': {
'type': 'field_reference numeric', 'type': 'field_reference numeric',
'comment': 'First field (X-axis)' 'comment': 'First field (X-axis)'
}, },
'field_2': { 'field_2': {
'type': 'field_reference numeric', 'type': 'field_reference numeric',
'comment': 'Second field (Y-axis)' 'comment': 'Second field (Y-axis)'
} }
}, },
'aggregate_result': { 'aggregate_result': {
'a_value': 'numeric', 'a_value': 'numeric',
'b_value': 'numeric', 'b_value': 'numeric',
'equation': 'text' 'equation': 'text'
} }
} }
} }
} }
def gen_math(working_set, param=False): def gen_math(working_set, param=False):
parser = NumericStringParser() parser = NumericStringParser()
results = [] results = []
xList = param['xParam'] xList = param['xParam']
expression = param['formula'] expression = param['formula']
for x in range(len(xList)): for x in range(len(xList)):
xValue = xList[x] xValue = xList[x]
modified_formula = '' modified_formula = ''
for i in range (len(expression)): for i in range (len(expression)):
if expression[i] != 'x': if expression[i] != 'x':
modified_formula += expression[i] modified_formula += expression[i]
else: else:
modified_formula += xValue | modified_formula += str(xValue)
r=parser.eval(modified_formula) r=parser.eval(modified_formula)
results.append(r) results.append(r)
return results | return {
> 'entry_analysis' : {
> 'result' : results
> }
> }
def basic(working_set, param=False): def basic(working_set, param=False):
fieldVals = param['field'] fieldVals = param['field']
average = 0 average = 0
total = 0 total = 0
variance = 0 variance = 0
maxVal = fieldVals[0] maxVal = fieldVals[0]
minVal = fieldVals[0] minVal = fieldVals[0]
for r in fieldVals: for r in fieldVals:
total += r total += r
if r > maxVal: if r > maxVal:
maxVal = r maxVal = r
elif r < minVal: elif r < minVal:
minVal = r minVal = r
average = total/len(fieldVals) average = total/len(fieldVals)
for r in fieldVals: for r in fieldVals:
variance += (r - average) ** 2 variance += (r - average) ** 2
variance /= len(fieldVals) - 1 variance /= len(fieldVals) - 1
standard_deviation = variance ** (.5) standard_deviation = variance ** (.5)
return { return {
#'meta' : res_meta, #'meta' : res_meta,
'aggregate_analysis': { 'aggregate_analysis': {
'total': total, 'total': total,
'max' : maxVal, 'max' : maxVal,
'min' : minVal, 'min' : minVal,
'average' : average, 'average' : average,
'variance' : variance, 'variance' : variance,
'standard_deviation' : standard_deviation 'standard_deviation' : standard_deviation
}, },
'entry_analysis': {} 'entry_analysis': {}
} }
def binary_operation(working_set, param=False): def binary_operation(working_set, param=False):
field1Vals = param['field_1'] field1Vals = param['field_1']
field2Vals = param['field_2'] field2Vals = param['field_2']
op = param['operation'] op = param['operation']
results = [] results = []
for i in range(len(field1Vals)): for i in range(len(field1Vals)):
v1 = field1Vals[i] v1 = field1Vals[i]
v2 = field2Vals[i] v2 = field2Vals[i]
r = v1 + v2 r = v1 + v2
if op == '-': if op == '-':
r = v1 - v2 r = v1 - v2
elif op == '/': elif op == '/':
r = v1/v2 r = v1/v2
elif op == '*': elif op == '*':
r = v1*v2 r = v1*v2
results.append(r) results.append(r)
return { return {
'entry_analysis' : { 'entry_analysis' : {
'result' : results 'result' : results
} }
} }
def correlation(working_set, param=False): def correlation(working_set, param=False):
field1Vals = param['field_1'] field1Vals = param['field_1']
field2Vals = param['field_2'] field2Vals = param['field_2']
sumX = 0 sumX = 0
sumY = 0 sumY = 0
sumX2 = 0 sumX2 = 0
sumY2 = 0 sumY2 = 0
sumXY = 0 sumXY = 0
for i in range(len(field1Vals)): for i in range(len(field1Vals)):
sumX += field1Vals[i] sumX += field1Vals[i]
sumY += field2Vals[i] sumY += field2Vals[i]
sumX2 += field1Vals[i]**2 sumX2 += field1Vals[i]**2
sumY2 += field2Vals[i]**2 sumY2 += field2Vals[i]**2
sumXY += field1Vals[i] * field2Vals[i] sumXY += field1Vals[i] * field2Vals[i]
result = (len(field1Vals)*sumXY - sumX*sumY) / ((len(field1Vals)*sumX2 - sumX**2)**(0.5) result = (len(field1Vals)*sumXY - sumX*sumY) / ((len(field1Vals)*sumX2 - sumX**2)**(0.5)
return { return {
'aggregate_analysis' : { 'aggregate_analysis' : {
'correlation': result 'correlation': result
} }
} }
def regression(working_set, param=False): def regression(working_set, param=False):
field1Vals = param['field_1'] field1Vals = param['field_1']
field2Vals = param['field_2'] field2Vals = param['field_2']
sumX = 0 sumX = 0
sumY = 0 sumY = 0
sumX2 = 0 sumX2 = 0
sumY2 = 0 sumY2 = 0
sumXY = 0 sumXY = 0
for i in range(len(field1Vals)): for i in range(len(field1Vals)):
sumX += field1Vals[i] sumX += field1Vals[i]
sumY += field2Vals[i] sumY += field2Vals[i]
sumX2 += field1Vals[i]**2 sumX2 += field1Vals[i]**2
sumY2 += field2Vals[i]**2 sumY2 += field2Vals[i]**2
sumXY += field1Vals[i] * field2Vals[i] sumXY += field1Vals[i] * field2Vals[i]
a_value = (1.0*sumY*sumX2 - sumX*sumXY)/(len(field1Vals)*sumX2 - sumX**2) a_value = (1.0*sumY*sumX2 - sumX*sumXY)/(len(field1Vals)*sumX2 - sumX**2)
b_value = (1.0*len(field1Vals)*sumXY - sumX*sumY)/(len(field1Vals)*sumX2 - sumX**2) b_value = (1.0*len(field1Vals)*sumXY - sumX*sumY)/(len(field1Vals)*sumX2 - sumX**2)
return { return {
'aggregate_analysis' : { 'aggregate_analysis' : {
'a_value': a_value, 'a_value': a_value,
'b_value': b_value, 'b_value': b_value,
'equation': "y = " + str(a_value) + " + " + str(b_value) + "x" 'equation': "y = " + str(a_value) + " + " + str(b_value) + "x"
} }
} }
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment