YannBouyeron/koala.py

## koala.py
# test regressions lin exp et pow avec heritage de class DataFrame

#import matplotlib
#matplotlib.use('Agg')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import attrdict

class Koala(pd.DataFrame):

	def __init__(self, data):

		super().__init__(data)


	def select(self,x,y):

		if type(x) == type(int()) and type(y) == type(int()) and x < len(self.columns) and y < len(self.columns) and x >= 0 and y >= 0:

			x_name = self.columns[x]
			y_name = self.columns[y]


		elif type(x) == type(str()) and type(y) == type(str()) and x in self.columns and y in self.columns:

			x_name = x
			y_name = y

		else:

			return None


		x = self[x_name]
		y = self[y_name]

		return x_name, y_name, np.array(x,dtype=float), np.array(y,dtype=float)


	def lin(self, x, y, show=True):

		# regression lineaire

		x_name, y_name, x, y = self.select(x, y)

		l = np.polyfit(x,y,1)
		a = l[0]
		b = l[1]
		cc = np.corrcoef(x,y)
		R = cc[0,1]
		f = 'y = '+str(round(a,2))+'x +' +str(round(b,2))
		r = 'R = '+str(round(R,2))

		plt.close()
		plt.plot(x, y, '^k', label='Original data', markersize=4)
		plt.plot(x, [a*i + b for i in x], '--b', label='Fitted line - linear regression')
		plt.title('{0} = f({1})'.format(y_name,x_name),fontsize=7)
		plt.xlabel(x_name,fontsize=7)
		plt.ylabel(y_name,fontsize=7)
		plt.legend(fontsize=6)

		if show == True:

				try:

					plt.show()

				except:

					pass

		return attrdict.AttrDict({"equation": 'y = {0} x + {1}'.format(a, b), "R": R, "a":a, "b":b, "f":f, "r":r, "graph":plt})


	def exp(self, x, y, show=True):

		# regression exponentielle

		x_name, y_name, x, y = self.select(x, y)

		try:

			Y = np.log(y)
			p = np.polyfit(x,Y,1)
			N = p[1]
			a = np.exp(N)
			b = p[0]
			cc = np.corrcoef(x,Y)
			R = cc[0,1]
			f = 'y = '+ str(round(a,2))+'*exp('+str(round(b,2))+'x)'
			r = 'R = '+ str(round(R,2))

			plt.close()
			plt.plot(x, y, '^k', label='Original data', markersize=2)
			plt.plot(x, [a*np.exp(b*i) for i in x], '--r', label='Fitted line - Exponential regression')
			plt.title('Regression Exponentielle y = a.exp(bx)', fontsize=5)
			plt.xlabel(x_name,fontsize=5)
			plt.ylabel(y_name,fontsize=5)
			plt.legend(fontsize=5)

			if show == True:

				try:

					plt.show()

				except:

					pass

			return attrdict.AttrDict({"equation": 'y = {0} * exp(x*{1})'.format(a, b), "R": R, "a":a, "b":b, "f":f, "r":r, "graph":plt})

		except:

			return None


	def pwr(self, x, y ,show=True):

		# regression puissance

		x_name, y_name, x, y = self.select(x, y)

		try:

			Y = np.log(y)
			X = np.log(x)
			p = np.polyfit(X,Y,1)
			N = p[1]
			a = np.exp(N)
			b = p[0]
			cc = np.corrcoef(X,Y)
			R = cc[0,1]
			f = 'y = '+str(round(a,2))+'*x^'+str(round(b,2))
			r = 'R = '+str(round(R,2))

			plt.close()
			plt.plot(x, y, '^k', label='Original data', markersize=2)
			plt.plot(x, a*(x**b), '--g', label='Fitted line - Regression puissance')
			plt.title('Regression puissance y = ax^b',fontsize=5)
			plt.xlabel(x_name,fontsize=5)
			plt.ylabel(y_name,fontsize=5)
			plt.legend(fontsize=5)

			if show == True:

				try:

					plt.show()

				except:

					pass


			return attrdict.AttrDict({"equation": 'y = {0} * x**{1}'.format(a, b), "R": R, "a":a, "b":b, "f":f, "r":r, "graph":plt})

		except:

			return None


	def reg(self, X, Y, shower=True):

		x_name, y_name, x, y = self.select(X, Y)

		if 0 in x or 0 in y:

			return self.lin(X, Y)


		lin = self.lin(X, Y,show=False)

		exp = self.exp(X, Y,show=False)

		pwr = self.pwr(X, Y,show=False)


		# affichage graphique

		plt.close()
		plt.figure(1)

		g1 = plt.subplot(2,2,1)
		plt.plot(x, y, '^k', label='Original data', markersize=2)
		plt.title('Nuage de point', fontsize=5)
		plt.xlabel(x_name,fontsize=5)
		plt.ylabel(y_name,fontsize=5)
		plt.legend(fontsize=5)

		g2 = plt.subplot(2,2,2)
		plt.plot(x, y, '^k', label='Original data', markersize=2)
		plt.plot(x, [lin.a*i+lin.b for i in x], '--b', label='Fitted line - linear regression')
		plt.title('Regression lineaire y = ax + b',fontsize=5)
		plt.xlabel(x_name,fontsize=5)
		plt.ylabel(y_name,fontsize=5)
		plt.legend(fontsize=5)

		g3 = plt.subplot(2,2,3)
		plt.plot(x, y, '^k', label='Original data', markersize=2)
		plt.plot(x, [exp.a*np.exp(exp.b*i) for i in x], '--r', label='Fitted line - Exponential regression')
		plt.title('Regression Exponentielle y = a.exp(bx)', fontsize=5)
		plt.xlabel(x_name,fontsize=5)
		plt.ylabel(y_name,fontsize=5)
		plt.legend(fontsize=5)

		g4 = plt.subplot(2,2,4)
		plt.plot(x, y, '^k', label='Original data', markersize=2)
		plt.plot(x, pwr.a*(x**pwr.b), '--g', label='Fitted line - Regression puissance')
		plt.title('Regression puissance y = ax^b',fontsize=5)
		plt.xlabel(x_name,fontsize=5)
		plt.ylabel(y_name,fontsize=5)
		plt.legend(fontsize=5)

		if shower == True:

			plt.tight_layout() # Or equivalently,  "plt.tight_layout()"
			plt.show()

		lin.graph = g2
		exp.graph = g3
		pwr.graph = g4

		return attrdict.AttrDict({"plot":g1, "lin":lin, "exp":exp, "pwr":pwr, "plt":plt})
	# test regressions lin exp et pow avec heritage de class DataFrame

	#import matplotlib
	#matplotlib.use('Agg')

	import numpy as np
	import pandas as pd
	import matplotlib.pyplot as plt
	import attrdict

	class Koala(pd.DataFrame):

	def __init__(self, data):

	super().__init__(data)


	def select(self,x,y):

	if type(x) == type(int()) and type(y) == type(int()) and x < len(self.columns) and y < len(self.columns) and x >= 0 and y >= 0:

	x_name = self.columns[x]
	y_name = self.columns[y]



	elif type(x) == type(str()) and type(y) == type(str()) and x in self.columns and y in self.columns:

	x_name = x
	y_name = y

	else:

	return None


	x = self[x_name]
	y = self[y_name]

	return x_name, y_name, np.array(x,dtype=float), np.array(y,dtype=float)



	def lin(self, x, y, show=True):

	# regression lineaire

	x_name, y_name, x, y = self.select(x, y)

	l = np.polyfit(x,y,1)
	a = l[0]
	b = l[1]
	cc = np.corrcoef(x,y)
	R = cc[0,1]
	f = 'y = '+str(round(a,2))+'x +' +str(round(b,2))
	r = 'R = '+str(round(R,2))

	plt.close()
	plt.plot(x, y, '^k', label='Original data', markersize=4)
	plt.plot(x, [a*i + b for i in x], '--b', label='Fitted line - linear regression')
	plt.title('{0} = f({1})'.format(y_name,x_name),fontsize=7)
	plt.xlabel(x_name,fontsize=7)
	plt.ylabel(y_name,fontsize=7)
	plt.legend(fontsize=6)

	if show == True:

	try:

	plt.show()

	except:

	pass

	return attrdict.AttrDict({"equation": 'y = {0} x + {1}'.format(a, b), "R": R, "a":a, "b":b, "f":f, "r":r, "graph":plt})



	def exp(self, x, y, show=True):

	# regression exponentielle

	x_name, y_name, x, y = self.select(x, y)

	try:

	Y = np.log(y)
	p = np.polyfit(x,Y,1)
	N = p[1]
	a = np.exp(N)
	b = p[0]
	cc = np.corrcoef(x,Y)
	R = cc[0,1]
	f = 'y = '+ str(round(a,2))+'*exp('+str(round(b,2))+'x)'
	r = 'R = '+ str(round(R,2))

	plt.close()
	plt.plot(x, y, '^k', label='Original data', markersize=2)
	plt.plot(x, [anp.exp(bi) for i in x], '--r', label='Fitted line - Exponential regression')
	plt.title('Regression Exponentielle y = a.exp(bx)', fontsize=5)
	plt.xlabel(x_name,fontsize=5)
	plt.ylabel(y_name,fontsize=5)
	plt.legend(fontsize=5)

	if show == True:

	try:

	plt.show()

	except:

	pass

	return attrdict.AttrDict({"equation": 'y = {0} * exp(x*{1})'.format(a, b), "R": R, "a":a, "b":b, "f":f, "r":r, "graph":plt})

	except:

	return None



	def pwr(self, x, y ,show=True):

	# regression puissance

	x_name, y_name, x, y = self.select(x, y)

	try:

	Y = np.log(y)
	X = np.log(x)
	p = np.polyfit(X,Y,1)
	N = p[1]
	a = np.exp(N)
	b = p[0]
	cc = np.corrcoef(X,Y)
	R = cc[0,1]
	f = 'y = '+str(round(a,2))+'*x^'+str(round(b,2))
	r = 'R = '+str(round(R,2))

	plt.close()
	plt.plot(x, y, '^k', label='Original data', markersize=2)
	plt.plot(x, a(x*b), '--g', label='Fitted line - Regression puissance')
	plt.title('Regression puissance y = ax^b',fontsize=5)
	plt.xlabel(x_name,fontsize=5)
	plt.ylabel(y_name,fontsize=5)
	plt.legend(fontsize=5)

	if show == True:

	try:

	plt.show()

	except:

	pass


	return attrdict.AttrDict({"equation": 'y = {0} * x**{1}'.format(a, b), "R": R, "a":a, "b":b, "f":f, "r":r, "graph":plt})

	except:

	return None








	def reg(self, X, Y, shower=True):

	x_name, y_name, x, y = self.select(X, Y)

	if 0 in x or 0 in y:

	return self.lin(X, Y)



	lin = self.lin(X, Y,show=False)

	exp = self.exp(X, Y,show=False)

	pwr = self.pwr(X, Y,show=False)



	# affichage graphique

	plt.close()
	plt.figure(1)

	g1 = plt.subplot(2,2,1)
	plt.plot(x, y, '^k', label='Original data', markersize=2)
	plt.title('Nuage de point', fontsize=5)
	plt.xlabel(x_name,fontsize=5)
	plt.ylabel(y_name,fontsize=5)
	plt.legend(fontsize=5)

	g2 = plt.subplot(2,2,2)
	plt.plot(x, y, '^k', label='Original data', markersize=2)
	plt.plot(x, [lin.a*i+lin.b for i in x], '--b', label='Fitted line - linear regression')
	plt.title('Regression lineaire y = ax + b',fontsize=5)
	plt.xlabel(x_name,fontsize=5)
	plt.ylabel(y_name,fontsize=5)
	plt.legend(fontsize=5)

	g3 = plt.subplot(2,2,3)
	plt.plot(x, y, '^k', label='Original data', markersize=2)
	plt.plot(x, [exp.anp.exp(exp.bi) for i in x], '--r', label='Fitted line - Exponential regression')
	plt.title('Regression Exponentielle y = a.exp(bx)', fontsize=5)
	plt.xlabel(x_name,fontsize=5)
	plt.ylabel(y_name,fontsize=5)
	plt.legend(fontsize=5)

	g4 = plt.subplot(2,2,4)
	plt.plot(x, y, '^k', label='Original data', markersize=2)
	plt.plot(x, pwr.a(x*pwr.b), '--g', label='Fitted line - Regression puissance')
	plt.title('Regression puissance y = ax^b',fontsize=5)
	plt.xlabel(x_name,fontsize=5)
	plt.ylabel(y_name,fontsize=5)
	plt.legend(fontsize=5)

	if shower == True:

	plt.tight_layout() # Or equivalently, "plt.tight_layout()"
	plt.show()

	lin.graph = g2
	exp.graph = g3
	pwr.graph = g4

	return attrdict.AttrDict({"plot":g1, "lin":lin, "exp":exp, "pwr":pwr, "plt":plt})