bisqwit/complexity_estimator.py

## complexity_estimator.py
import numpy as np
import scipy
import traceback as tb

def latexnum(n, num_decimals=6):
  s = "%.*g" % (num_decimals, n)
  s = s.replace('e+', '\\cdot 10^{')
  s = s.replace('e-', '\\cdot 10^{-')
  if '{' in s: s += '}'
  s = s.replace('.', '{,}')
  return s
def latexpow(var, n, num_decimals=6):
  if(n < 0.1): n = 0
  s = latexnum(n, num_decimals)
  if s=="1": return var
  if s=="0": return ""
  if len(s)==1: return var + "^" + s
  return var + "^{" + s + "}"

def correlate_one(x,y):
  mean  = np.mean(y)
  error = np.sum(np.power(y - mean, 2))
  debug = "mean=%g error=%g" % (mean, error)
  return (error, [mean],
          "%s" % (latexnum(mean)),
          "1",
          lambda x: mean,
          debug)

def correlate_power(x,y):
  # Try y = b * x^a
  #
  # If we get
  #    b=1, then likely O(N) is case
  #    b=2, then likely O(N²) is case
  #    b=3, then likely O(N³) is case
  #    b=4, then likely O(N⁴) is case
  # We get exponential fitting by
  # performing linear LSM on log(x) vs log(y)
  # Result is log(y) = log(x)*a + b
  #         →     y  = exp(log(x)*a + b) = exp(b) * exp(log(x)*a)
  #                  = exp(b) * x^a
  lx = np.log(x)
  ly = np.log(y)
  params,residuals,rank,singular,rcond = np.polyfit(lx, ly, 1, full=True)
  exponent = params[0]
  factor   = np.exp(params[1])
  error = np.sum(np.power(y - (factor * x**exponent), 2))
  debug = "exponent=%g factor=%g residuals=%g error=%g" % (exponent,factor,residuals[0], error)
  p = latexpow("N", params[0], 1)
  return (error, params,
          "%s\\,x^{%s}" % (latexnum(params[1]), latexnum(params[0])),
          p if len(p) else "1",
          lambda x: factor * x**exponent,
          debug)

def correlate_poly(x,y):
  # Try y = ax^2 + bx + c
  #
  func = lambda x,a,b,c: a*x*x + b*x + c
  #params,pcov = scipy.optimize.curve_fit(func, xdata=x, ydata=y)
  #diag=np.sqrt(np.diag(pcov))
  params,residuals,rank,singular,rcond = np.polyfit(x, y, 2, full=True)
  error = np.sum(np.power(y - func(x,*params), 2))
  debug = "a=%g b=%g c=%g residuals=%g error=%g" % (*params,residuals[0], error)
  #debug = "a=%g b=%g c=%g residuals=%s error=%g" % (*params,str(diag), error)
  p = latexpow("N", 2 if abs(params[0]) >= 1e-2
               else 1 if abs(params[1]) >= 1e-2
               else 0
          , 1)

  return (error, params,
          "%s\\,x^2 + %s\\,x + %s" % (latexnum(params[0]), latexnum(params[1]), latexnum(params[2])),
          p if len(p) else "1",
          lambda x: func(x,*params),
          debug)

def correlate_powerlog(x,y):
  try:
    exponent = np.floor(correlate_power(x,y)[1][0])
    if exponent == 0: return (float("inf"), [], "", "", "This doesn't work")
    # Try y = a * log(x) * x^K
    #
    func = lambda x,a: a * np.log(x) * (x**exponent)
    params,pcov = scipy.optimize.curve_fit(func, xdata=x, ydata=y)
    diag=np.sqrt(np.diag(pcov))
    error = np.sum(np.power(y - func(x,*params), 2))
    debug = "a=%g residuals=%s error=%g" % (*params,str(diag), error)
    return (error, params,
           "%s\\,\\log(x)\\,%s" % (latexnum(params[0]), latexpow("x", exponent)),
           "%s \\log(N)" % (latexpow("N", exponent, 1)),
           lambda x: func(x,*params),
           debug
           )
  except Exception as v:
    return (float("inf"), [], "", "", str(v)) # + tb.format_exc())

def correlate_exponent(x,y):
  # Try y = a * b^x + c

  try:
    func = lambda x,a,b,c: a * b**x + c
    # ^ Note: Prints out a warning in some cases

    params,pcov = scipy.optimize.curve_fit(func, xdata=x, ydata=y, method='dogbox')
    diag=np.sqrt(np.diag(pcov))
    error = np.sum(np.power(y - func(x,*params), 2))
    debug = "a=%g b=%g c=%g residuals=%s error=%g" % (*params,str(diag), error)
    return (error, params,
           "%s \\cdot %s^x + %s" % (latexnum(params[0]),latexnum(params[1]),latexnum(params[2])),
           "%s^N" % (latexnum(params[1], 1)),
           lambda x: func(x,*params),
           debug
           )
  except Exception as v:
    return (float("inf"), [], "", "", str(v)) # + tb.format_exc())

def correlate_powerlog_plus(x,y):
  try:
    exponent = np.floor(correlate_power(x,y)[1][0])
    if exponent == 0: return (float("inf"), [], "", "", "This doesn't work")
    # Try y = (b * log(x) + c) * x^K + a
    #
    func = lambda x,a,b,c: a + (b * np.log(x) + c) * (x**exponent)
    params,pcov = scipy.optimize.curve_fit(func, xdata=x, ydata=y)
    diag=np.sqrt(np.diag(pcov))
    error = np.sum(np.power(y - func(x,*params), 2))
    debug = "a=%g b=%g c=%g residuals=%s error=%g" %  (*params, str(diag), error)
    return (error, params,
           "\\left(%s\\,\\log(x) + %s\\right)\\,%s + %s" % (latexnum(params[1]), latexnum(params[2]), latexpow("x", exponent), latexnum(params[0])),
           "%s \\log(N)" % latexpow("N", exponent, 1),
           lambda x: func(x,*params),
           debug
           )
  except Exception as v:
    return (float("inf"), [], "", "", str(v)) # + tb.format_exc())

def correlate_log_plus(x,y):
  # Try y = a * log(x) + b
  #
  try:
    lx = np.log(x)
    params,residuals,rank,singular,rcond = np.polyfit(lx, y, 1, full=True)
    error = np.sum(np.power(y - (params[0]*lx+params[1]), 2))
    debug = "a=%g b=%g residuals=%g error=%g" % (*params, residuals[0], error)
    if(params[0] < params[1]/1000): return (float("inf"), [], "", "", "This doesn't work")
    return (error, params,
           "%s\\,\\log(x) + %s" % (latexnum(params[0]), latexnum(params[1])),
           "\\log(N)",
           lambda x: params[0]*np.log(x) + params[1],
           debug
           )
  except Exception as v:
    return (float("inf"), [], "", "", str(v)) # + tb.format_exc())

def correlate_factorial(x,y):
  # Try y = (x + a)!
  try:
    def func(x,a):
      r = scipy.special.factorial(x + a)
      #print(x,a,r)
      return r
    params,pcov = scipy.optimize.curve_fit(func, xdata=x, ydata=y, p0=[0])
    diag=np.sqrt(np.diag(pcov))
    error = np.sum(np.power(y - func(x,*params), 2))
    debug = "a=%g residuals=%s error=%g" %  (*params, str(diag), error)
    return (error, params,
           "(x + %s)!" % (latexnum(params[0])),
           "N!",
           lambda x: func(x,*params),
           debug
           )
  except Exception as v:
    return (float("inf"), [], "", "", str(v)) # + tb.format_exc())

def correlate_factoriallog(x,y):
  # Try y = (x + a)! * log(b)
  try:
    def func(x,a,b):
      r = scipy.special.factorial(x + a) * np.log(x) * b
      return r
    params,pcov = scipy.optimize.curve_fit(func, xdata=x, ydata=y)
    diag=np.sqrt(np.diag(pcov))
    error = np.sum(np.power(y - func(x,*params), 2))
    debug = "a=%g b=%g residuals=%s error=%g" %  (*params, str(diag), error)
    return (error, params,
           "%s \\cdot(x + %s)!\\log(x) \\" % (latexnum(params[1]), latexnum(params[0])),
           "N! \\log(N)",
           lambda x: func(x,*params),
           debug
           )
  except Exception as v:
    return (float("inf"), [], "", "", str(v)) # + tb.format_exc())


def correlate(lengths, times):
  x = np.array(lengths)
  y = np.array(times)
  results = {"const":correlate_one(x,y),
             "poly":correlate_poly(x,y),
             "power":correlate_power(x,y),
             "powerlog":correlate_powerlog(x,y),
             "powerlog+":correlate_powerlog_plus(x,y),
             "exp":correlate_exponent(x,y),
             "log+":correlate_log_plus(x,y),
             "factor":correlate_factorial(x,y),
             "factlog":correlate_factoriallog(x,y)}
  chosen = {"score":float("inf"), "debug":results}
  #print("--")
  for k in results:
    #print(k,results[k])
    if results[k][0] < chosen['score']:
      chosen = {"name":k,                 # Name of the estimator
                "score":results[k][0],    # Mean squared error from the estimation
                "complexity":results[k][3].strip(), # LaTeX expression for running time complexity
                "formula":results[k][2],  # Formula for the estimating function y = f(x)
                "params":results[k][1],   # Parameters from the estimator
                "function":results[k][4], # Callable function y=function(x)
                "debug":results}          # All the results
  return chosen


if __name__ == '__main__':
  assert(correlate([1,10,100,1000,10000],
                   [5,500,50000,5000000,500000000])['complexity'] == 'N^2')
  assert(correlate([1,10,100,1000,10000],
                   [5, 6, 7, 8, 9])['complexity'] == '\\log(N)')
  assert(correlate([1,10,100,1000,10000],
                   [5, 60, 700, 8000, 92000])['complexity'] == 'N \\log(N)')
  assert(correlate([1,2,3,4,5],
                   [2*1,2*3,2*6,2*10,2*15])['complexity'] == 'N^2')
  assert(correlate([1,2,3,4,5],
                   [64,128,256,512,1024])['complexity'] == '2^N')
  assert(correlate([2,3,4,5,6],
                   [1,2,6,24,121])['complexity'] == 'N!')
  assert(correlate([2,3,4,5,6],
                   [1*np.log(2),2*np.log(3),6*np.log(4),24*np.log(5),120*np.log(6)])['complexity'] == 'N! \\log(N)')
  assert(correlate([1,7,15,40,13],
                   [30,211,452,1201,388])['complexity'] == 'N')
  assert(correlate([1,7,15,40,13,12,1200],
                   [1234,1235,1235,1235,1236,1234,1235])['complexity'] == '1')
	import numpy as np
	import scipy
	import traceback as tb

	def latexnum(n, num_decimals=6):
	s = "%.*g" % (num_decimals, n)
	s = s.replace('e+', '\\cdot 10^{')
	s = s.replace('e-', '\\cdot 10^{-')
	if '{' in s: s += '}'
	s = s.replace('.', '{,}')
	return s
	def latexpow(var, n, num_decimals=6):
	if(n < 0.1): n = 0
	s = latexnum(n, num_decimals)
	if s=="1": return var
	if s=="0": return ""
	if len(s)==1: return var + "^" + s
	return var + "^{" + s + "}"

	def correlate_one(x,y):
	mean = np.mean(y)
	error = np.sum(np.power(y - mean, 2))
	debug = "mean=%g error=%g" % (mean, error)
	return (error, [mean],
	"%s" % (latexnum(mean)),
	"1",
	lambda x: mean,
	debug)

	def correlate_power(x,y):
	# Try y = b * x^a
	#
	# If we get
	# b=1, then likely O(N) is case
	# b=2, then likely O(N²) is case
	# b=3, then likely O(N³) is case
	# b=4, then likely O(N⁴) is case
	# We get exponential fitting by
	# performing linear LSM on log(x) vs log(y)
	# Result is log(y) = log(x)*a + b
	# → y = exp(log(x)a + b) = exp(b) exp(log(x)*a)
	# = exp(b) * x^a
	lx = np.log(x)
	ly = np.log(y)
	params,residuals,rank,singular,rcond = np.polyfit(lx, ly, 1, full=True)
	exponent = params[0]
	factor = np.exp(params[1])
	error = np.sum(np.power(y - (factor * x**exponent), 2))
	debug = "exponent=%g factor=%g residuals=%g error=%g" % (exponent,factor,residuals[0], error)
	p = latexpow("N", params[0], 1)
	return (error, params,
	"%s\\,x^{%s}" % (latexnum(params[1]), latexnum(params[0])),
	p if len(p) else "1",
	lambda x: factor * x**exponent,
	debug)

	def correlate_poly(x,y):
	# Try y = ax^2 + bx + c
	#
	func = lambda x,a,b,c: axx + b*x + c
	#params,pcov = scipy.optimize.curve_fit(func, xdata=x, ydata=y)
	#diag=np.sqrt(np.diag(pcov))
	params,residuals,rank,singular,rcond = np.polyfit(x, y, 2, full=True)
	error = np.sum(np.power(y - func(x,*params), 2))
	debug = "a=%g b=%g c=%g residuals=%g error=%g" % (*params,residuals[0], error)
	#debug = "a=%g b=%g c=%g residuals=%s error=%g" % (*params,str(diag), error)
	p = latexpow("N", 2 if abs(params[0]) >= 1e-2
	else 1 if abs(params[1]) >= 1e-2
	else 0
	, 1)

	return (error, params,
	"%s\\,x^2 + %s\\,x + %s" % (latexnum(params[0]), latexnum(params[1]), latexnum(params[2])),
	p if len(p) else "1",
	lambda x: func(x,*params),
	debug)

	def correlate_powerlog(x,y):
	try:
	exponent = np.floor(correlate_power(x,y)[1][0])
	if exponent == 0: return (float("inf"), [], "", "", "This doesn't work")
	# Try y = a * log(x) * x^K
	#
	func = lambda x,a: a * np.log(x) * (x**exponent)
	params,pcov = scipy.optimize.curve_fit(func, xdata=x, ydata=y)
	diag=np.sqrt(np.diag(pcov))
	error = np.sum(np.power(y - func(x,*params), 2))
	debug = "a=%g residuals=%s error=%g" % (*params,str(diag), error)
	return (error, params,
	"%s\\,\\log(x)\\,%s" % (latexnum(params[0]), latexpow("x", exponent)),
	"%s \\log(N)" % (latexpow("N", exponent, 1)),
	lambda x: func(x,*params),
	debug
	)
	except Exception as v:
	return (float("inf"), [], "", "", str(v)) # + tb.format_exc())

	def correlate_exponent(x,y):
	# Try y = a * b^x + c

	try:
	func = lambda x,a,b,c: a * b**x + c
	# ^ Note: Prints out a warning in some cases

	params,pcov = scipy.optimize.curve_fit(func, xdata=x, ydata=y, method='dogbox')
	diag=np.sqrt(np.diag(pcov))
	error = np.sum(np.power(y - func(x,*params), 2))
	debug = "a=%g b=%g c=%g residuals=%s error=%g" % (*params,str(diag), error)
	return (error, params,
	"%s \\cdot %s^x + %s" % (latexnum(params[0]),latexnum(params[1]),latexnum(params[2])),
	"%s^N" % (latexnum(params[1], 1)),
	lambda x: func(x,*params),
	debug
	)
	except Exception as v:
	return (float("inf"), [], "", "", str(v)) # + tb.format_exc())

	def correlate_powerlog_plus(x,y):
	try:
	exponent = np.floor(correlate_power(x,y)[1][0])
	if exponent == 0: return (float("inf"), [], "", "", "This doesn't work")
	# Try y = (b * log(x) + c) * x^K + a
	#
	func = lambda x,a,b,c: a + (b * np.log(x) + c) * (x**exponent)
	params,pcov = scipy.optimize.curve_fit(func, xdata=x, ydata=y)
	diag=np.sqrt(np.diag(pcov))
	error = np.sum(np.power(y - func(x,*params), 2))
	debug = "a=%g b=%g c=%g residuals=%s error=%g" % (*params, str(diag), error)
	return (error, params,
	"\\left(%s\\,\\log(x) + %s\\right)\\,%s + %s" % (latexnum(params[1]), latexnum(params[2]), latexpow("x", exponent), latexnum(params[0])),
	"%s \\log(N)" % latexpow("N", exponent, 1),
	lambda x: func(x,*params),
	debug
	)
	except Exception as v:
	return (float("inf"), [], "", "", str(v)) # + tb.format_exc())

	def correlate_log_plus(x,y):
	# Try y = a * log(x) + b
	#
	try:
	lx = np.log(x)
	params,residuals,rank,singular,rcond = np.polyfit(lx, y, 1, full=True)
	error = np.sum(np.power(y - (params[0]*lx+params[1]), 2))
	debug = "a=%g b=%g residuals=%g error=%g" % (*params, residuals[0], error)
	if(params[0] < params[1]/1000): return (float("inf"), [], "", "", "This doesn't work")
	return (error, params,
	"%s\\,\\log(x) + %s" % (latexnum(params[0]), latexnum(params[1])),
	"\\log(N)",
	lambda x: params[0]*np.log(x) + params[1],
	debug
	)
	except Exception as v:
	return (float("inf"), [], "", "", str(v)) # + tb.format_exc())

	def correlate_factorial(x,y):
	# Try y = (x + a)!
	try:
	def func(x,a):
	r = scipy.special.factorial(x + a)
	#print(x,a,r)
	return r
	params,pcov = scipy.optimize.curve_fit(func, xdata=x, ydata=y, p0=[0])
	diag=np.sqrt(np.diag(pcov))
	error = np.sum(np.power(y - func(x,*params), 2))
	debug = "a=%g residuals=%s error=%g" % (*params, str(diag), error)
	return (error, params,
	"(x + %s)!" % (latexnum(params[0])),
	"N!",
	lambda x: func(x,*params),
	debug
	)
	except Exception as v:
	return (float("inf"), [], "", "", str(v)) # + tb.format_exc())

	def correlate_factoriallog(x,y):
	# Try y = (x + a)! * log(b)
	try:
	def func(x,a,b):
	r = scipy.special.factorial(x + a) * np.log(x) * b
	return r
	params,pcov = scipy.optimize.curve_fit(func, xdata=x, ydata=y)
	diag=np.sqrt(np.diag(pcov))
	error = np.sum(np.power(y - func(x,*params), 2))
	debug = "a=%g b=%g residuals=%s error=%g" % (*params, str(diag), error)
	return (error, params,
	"%s \\cdot(x + %s)!\\log(x) \\" % (latexnum(params[1]), latexnum(params[0])),
	"N! \\log(N)",
	lambda x: func(x,*params),
	debug
	)
	except Exception as v:
	return (float("inf"), [], "", "", str(v)) # + tb.format_exc())


	def correlate(lengths, times):
	x = np.array(lengths)
	y = np.array(times)
	results = {"const":correlate_one(x,y),
	"poly":correlate_poly(x,y),
	"power":correlate_power(x,y),
	"powerlog":correlate_powerlog(x,y),
	"powerlog+":correlate_powerlog_plus(x,y),
	"exp":correlate_exponent(x,y),
	"log+":correlate_log_plus(x,y),
	"factor":correlate_factorial(x,y),
	"factlog":correlate_factoriallog(x,y)}
	chosen = {"score":float("inf"), "debug":results}
	#print("--")
	for k in results:
	#print(k,results[k])
	if results[k][0] < chosen['score']:
	chosen = {"name":k, # Name of the estimator
	"score":results[k][0], # Mean squared error from the estimation
	"complexity":results[k][3].strip(), # LaTeX expression for running time complexity
	"formula":results[k][2], # Formula for the estimating function y = f(x)
	"params":results[k][1], # Parameters from the estimator
	"function":results[k][4], # Callable function y=function(x)
	"debug":results} # All the results
	return chosen


	if __name__ == '__main__':
	assert(correlate([1,10,100,1000,10000],
	[5,500,50000,5000000,500000000])['complexity'] == 'N^2')
	assert(correlate([1,10,100,1000,10000],
	[5, 6, 7, 8, 9])['complexity'] == '\\log(N)')
	assert(correlate([1,10,100,1000,10000],
	[5, 60, 700, 8000, 92000])['complexity'] == 'N \\log(N)')
	assert(correlate([1,2,3,4,5],
	[21,23,26,210,2*15])['complexity'] == 'N^2')
	assert(correlate([1,2,3,4,5],
	[64,128,256,512,1024])['complexity'] == '2^N')
	assert(correlate([2,3,4,5,6],
	[1,2,6,24,121])['complexity'] == 'N!')
	assert(correlate([2,3,4,5,6],
	[1np.log(2),2np.log(3),6np.log(4),24np.log(5),120*np.log(6)])['complexity'] == 'N! \\log(N)')
	assert(correlate([1,7,15,40,13],
	[30,211,452,1201,388])['complexity'] == 'N')
	assert(correlate([1,7,15,40,13,12,1200],
	[1234,1235,1235,1235,1236,1234,1235])['complexity'] == '1')