lotabout/decision-tree.py

## decision-tree.py
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

class DecisionTree(object):
    def __init__(self, data):
        """Learn a decision tree from data and label

        :data: List[List[val]], a list contains M sample, each sample is represented by a List
               The last column of the sample is the label
        :returns: The root of a decision tree

        """

        super(DecisionTree, self).__init__()
        self.root = self._split(data)

    def _split_samples(self, samples, feature):
        """Split samples into subsets, according to the feature

        :samples: List[List[val]]
        :feature: Int
        :returns: {val: List[data]} a dict contains the data of subsets

        """
        ret = {}
        for sample in samples:
            val = sample[feature]
            ret.setdefault(val, [])
            ret[val].append(sample)
        return ret
    def _split(self, data, level=0):
        """recursively split the data for node

        :data: List[data]
        :returns: label if should stop, else a node of the tree

        """

        if self._stop_now(data):
            return data[0][-1]

        # split the data
        feature = self._get_feature(data, level)
        subsets = self._split_samples(data, feature)

        return {key: self._split(subset, level+1) for key, subset in subsets.items()}

    def _stop_now(self, data):
        """check if we need to stop now

        :data: List[data]
        :returns: Boolean

        """
        labels = [d[-1] for d in data]
        return len(set(labels)) <= 1

    def _get_feature(self, data, level):
        """Decide which feature to be used to split data

        :data: List[data]
        :level: Int the level of the tree
        :returns: Int the dimension of the data to be used for split data

        """
        return level

data = [['白',   '富',   '美',   '去'],
        ['白',   '富',   '不美', '去'],
        ['白',   '不富', '美',   '犹豫'],
        ['白',   '不富', '不美', '犹豫'],
        ['不白', '富',   '美',   '去'],
        ['不白', '富',   '不美', '去'],
        ['不白', '不富', '美',   '犹豫'],
        ['不白', '不富', '不美', '不去']]

tree = DecisionTree(data)
print(tree.root)
	#!/usr/bin/env python3
	# -- coding: utf-8 --

	class DecisionTree(object):
	def __init__(self, data):
	"""Learn a decision tree from data and label

	:data: List[List[val]], a list contains M sample, each sample is represented by a List
	The last column of the sample is the label
	:returns: The root of a decision tree

	"""

	super(DecisionTree, self).__init__()
	self.root = self._split(data)

	def _split_samples(self, samples, feature):
	"""Split samples into subsets, according to the feature

	:samples: List[List[val]]
	:feature: Int
	:returns: {val: List[data]} a dict contains the data of subsets

	"""
	ret = {}
	for sample in samples:
	val = sample[feature]
	ret.setdefault(val, [])
	ret[val].append(sample)
	return ret
	def _split(self, data, level=0):
	"""recursively split the data for node

	:data: List[data]
	:returns: label if should stop, else a node of the tree

	"""

	if self._stop_now(data):
	return data[0][-1]

	# split the data
	feature = self._get_feature(data, level)
	subsets = self._split_samples(data, feature)

	return {key: self._split(subset, level+1) for key, subset in subsets.items()}

	def _stop_now(self, data):
	"""check if we need to stop now

	:data: List[data]
	:returns: Boolean

	"""
	labels = [d[-1] for d in data]
	return len(set(labels)) <= 1

	def _get_feature(self, data, level):
	"""Decide which feature to be used to split data

	:data: List[data]
	:level: Int the level of the tree
	:returns: Int the dimension of the data to be used for split data

	"""
	return level

	data = [['白', '富', '美', '去'],
	['白', '富', '不美', '去'],
	['白', '不富', '美', '犹豫'],
	['白', '不富', '不美', '犹豫'],
	['不白', '富', '美', '去'],
	['不白', '富', '不美', '去'],
	['不白', '不富', '美', '犹豫'],
	['不白', '不富', '不美', '不去']]

	tree = DecisionTree(data)
	print(tree.root)