Create a gist now

Instantly share code, notes, and snippets.

What would you like to do?
最小二乗法による多項式近似(参考:「ITエンジニアのための機械学習理論入門」)
# -*- coding: utf-8 -*-
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from pandas import Series, DataFrame
from numpy.random import normal
#------------#
# Parameters #
#------------#
N=100
M=[0,1,3,9]
# Dataset {x_n,y_n} (n=1...N)
def create_dataset(num):
dataset = DataFrame(columns=['x','y'])
for i in range(num):
x = float(i)/float(num-1)
y = np.sin(2*np.pi*x) + normal(scale=0.3)
dataset = dataset.append(Series([x,y], index=['x','y']),
ignore_index=True)
return dataset
# Root mean square error
def rms_error(dataset, f):
err = 0.0
for index, line in dataset.iterrows():
x, y = line.x, line.y
err += 0.5 * (y - f(x))**2
return np.sqrt(2 * err / len(dataset))
# Least-squares method
def resolve(dataset, m):
t = dataset.y
phi = DataFrame()
for i in range(0,m+1):
p = dataset.x**i
p.name="x**%d" % i
phi = pd.concat([phi,p], axis=1)
tmp = np.linalg.inv(np.dot(phi.T, phi))
ws = np.dot(np.dot(tmp, phi.T), t)
def f(x):
y = 0
for i, w in enumerate(ws):
y += w * (x ** i)
return y
return (f, ws)
if __name__ == '__main__':
train_set = create_dataset(N)
test_set = create_dataset(N)
df_ws = DataFrame()
fig = plt.figure()
for c, m in enumerate(M):
f, ws = resolve(train_set, m)
df_ws = df_ws.append(Series(ws,name="M=%d" % m))
subplot = fig.add_subplot(2,2,c+1)
subplot.set_xlim(-0.05,1.05)
subplot.set_ylim(-1.5,1.5)
subplot.set_title("M=%d" % m)
# plot Training set
subplot.scatter(train_set.x, train_set.y, marker='o', color='blue')
# plot True graph
linex = np.linspace(0,1,101)
liney = np.sin(2*np.pi*linex)
subplot.plot(linex, liney, color='green', linestyle='--')
# plot Approximate graph
linex = np.linspace(0,1,101)
liney = f(linex)
label = "E(RMS)=%.2f" % rms_error(train_set, f)
subplot.plot(linex, liney, color='red', label=label)
subplot.legend(loc=1)
print "Table of the coefficients"
print df_ws.transpose()
fig.show()
df = DataFrame(columns=['Training set','Test set'])
for m in range(0,10): # Dimension
f, ws = resolve(train_set, m)
train_error = rms_error(train_set, f)
test_error = rms_error(test_set, f)
df = df.append(
Series([train_error, test_error],
index=['Training set','Test set']),
ignore_index=True)
df.plot(title='RMS Error', style=['-','--'], grid=True, ylim=(0,0.9))
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment