C Ranga Vamsi vamc-stash

## DTRegressionAlgorithm.py
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math
import operator


eps = np.finfo(float).eps

def rmse_score(y_true, y_pred):

## DTRegressionPredictions.py
def _predict_target(self, feature_lookup, x, tree):

	for node in tree.keys():
		val = x[node]
		if type(val) == str:
			tree = tree[node][val]
		else:
			cutoff = str(list(tree[node].keys())[0]).split('<=')[1]

			if(val <= float(cutoff)):	#Left Child

## DTRegression_build.py
def _build_tree(self, df, tree = None):

  """
    Args:
      df: current number of rows available for splitting(decision making)

  """

  #Get feature with minimum score
  feature, cutoff = self._find_best_split(df)

## DTRegression_splitFeature.py
def _find_feature_split(self, feature, df):

  best_score = float('inf')
  cutoff = float

  for val in df[feature]:
    left_child = df[feature][df[feature] <= val]
    right_child = df[feature][df[feature] > val]

    if(len(left_child) > 0 and len(right_child) > 0):

## DTRegression_split.py
def _find_best_split(self, df):

  """
    Finds the column to split on first.

  """

  best_feature = str
  cutoff = None
  best_score = float('inf')

## DecisionTreeRegression_init.py
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math
import operator


eps = np.finfo(float).eps

def train_test_split(x, y, test_size = 0.25, random_state = None):

## DecisionTreeClassification.py
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math
import operator


eps = np.finfo(float).eps

def accuracy_score(y_true, y_pred):

## DecisionTreePredictions.py
def _predict_target(self, feature_lookup, x, tree):

	for node in tree.keys():
		val = x[node]
		if type(val) == str:
			tree = tree[node][val]
		else:
			cutoff = str(list(tree[node].keys())[0]).split('<=')[1]

			if(val <= float(cutoff)):	#Left Child

## BuildDecisionTreeClassifier.py
def _build_tree(self, df, tree = None):

  """
    Args:
      df: current number of rows available for splitting(decision making)

  """

  #Get feature with maximum information gain
  feature, cutoff = self._find_best_split(df)

## InformationGain.py
def _find_best_split(self, df):

	"""
		Finds the column to split on first using 'Information Gain' Metric.

		Information Gain(IG) = Entropy(parent) - Sum of Entropy(Children)
					IG(T, a) = H(T) - H(T|a)

		Entropy(parent) H(T) = (Sum[i=1 to J](- Pi * log(Pi)))
		Sum of Entropy(children) H(T|a) = Sum(P(a) * Sum[i=1 to J](- P(i|a) * log(P(i|a)))
	import numpy as np
	import pandas as pd
	import matplotlib.pyplot as plt
	import math
	import operator


	eps = np.finfo(float).eps

	def rmse_score(y_true, y_pred):
	def _predict_target(self, feature_lookup, x, tree):

	for node in tree.keys():
	val = x[node]
	if type(val) == str:
	tree = tree[node][val]
	else:
	cutoff = str(list(tree[node].keys())[0]).split('<=')[1]

	if(val <= float(cutoff)): #Left Child
	def _build_tree(self, df, tree = None):

	"""
	Args:
	df: current number of rows available for splitting(decision making)

	"""

	#Get feature with minimum score
	feature, cutoff = self._find_best_split(df)
	def _find_feature_split(self, feature, df):

	best_score = float('inf')
	cutoff = float

	for val in df[feature]:
	left_child = df[feature][df[feature] <= val]
	right_child = df[feature][df[feature] > val]

	if(len(left_child) > 0 and len(right_child) > 0):
	def _find_best_split(self, df):

	"""
	Finds the column to split on first.

	"""

	best_feature = str
	cutoff = None
	best_score = float('inf')
	def _find_best_split(self, df):

	"""
	Finds the column to split on first using 'Information Gain' Metric.

	Information Gain(IG) = Entropy(parent) - Sum of Entropy(Children)
	IG(T, a) = H(T) - H(T\|a)

	Entropy(parent) H(T) = (Sum[i=1 to J](- Pi * log(Pi)))
	Sum of Entropy(children) H(T\|a) = Sum(P(a) * Sum[i=1 to J](- P(i\|a) * log(P(i\|a)))