moyuanhuang/ml_cheat_sheet.py

## ml_cheat_sheet.py
# Easy-to-use Python package for basic machine learning
# scikit-learn: https://scikit-learn.org/stable/
# All the following examples are based on it.

# 1. Principle Component Analysis
# https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html

import numpy as np
from sklearn.decomposition import IncrementalPCA

# X is your data, it can be an n * d array, where n is the number of data,
# and d is the number of features (columns) of each data row.

X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
# You can set n_components to arbitrary number, normally it should be smaller
# than d. Here I set it to 2 since the sample data X has d = 2.
n_components = 2
pca = IncrementalPCA(n_components=2, batch_size=3)
pca.fit(X)
ipca.transform(X)

# ==============================================================================

# 2. Random Forrests
from sklearn.ensemble import RandomForestRegressor
# this is just for generating data
from sklearn.datasets import make_regression

# sample data
X, y = make_regression(n_features=4, n_informative=2, random_state=0, shuffle=False)
regr = RandomForestRegressor(max_depth=2, random_state=0, n_estimators=100)

# run regression
regr.fit(X, y)

# make prediction on a sample point (0,0,0,0)
print(regr.predict([[0, 0, 0, 0]]))
	# Easy-to-use Python package for basic machine learning
	# scikit-learn: https://scikit-learn.org/stable/
	# All the following examples are based on it.

	# 1. Principle Component Analysis
	# https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html

	import numpy as np
	from sklearn.decomposition import IncrementalPCA

	# X is your data, it can be an n * d array, where n is the number of data,
	# and d is the number of features (columns) of each data row.

	X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
	# You can set n_components to arbitrary number, normally it should be smaller
	# than d. Here I set it to 2 since the sample data X has d = 2.
	n_components = 2
	pca = IncrementalPCA(n_components=2, batch_size=3)
	pca.fit(X)
	ipca.transform(X)

	# ==============================================================================

	# 2. Random Forrests
	from sklearn.ensemble import RandomForestRegressor
	# this is just for generating data
	from sklearn.datasets import make_regression

	# sample data
	X, y = make_regression(n_features=4, n_informative=2, random_state=0, shuffle=False)
	regr = RandomForestRegressor(max_depth=2, random_state=0, n_estimators=100)

	# run regression
	regr.fit(X, y)

	# make prediction on a sample point (0,0,0,0)
	print(regr.predict([[0, 0, 0, 0]]))