Skip to content

Instantly share code, notes, and snippets.

@phillipi
Created July 15, 2023 23:13
Show Gist options
  • Save phillipi/038cd0322520f30ff155954a2c4f6971 to your computer and use it in GitHub Desktop.
Save phillipi/038cd0322520f30ff155954a2c4f6971 to your computer and use it in GitHub Desktop.
ML can extrapolate
import numpy as np
import matplotlib.pyplot as plt
def mk_data(true_theta, noise_sd, N_data_points, data_mean, data_sd):
data_x = np.expand_dims(np.random.randn(N_data_points)*data_sd + data_mean,axis=1)
data_y = np.zeros(data_x.shape)
for k in range(data_y.shape[0]):
data_y[k,0] = np.expand_dims(np.array([f(data_x[k,0],true_theta)]),axis=1) + np.random.randn(1)*noise_sd
data = np.concatenate([data_x,data_y],axis=1)
return data
def mk_loss_map(data,f):
theta_min = -2
theta_max = 2
[xx,yy] = np.meshgrid(np.linspace(theta_min,theta_max,101), np.linspace(theta_min,theta_max,101))
data_fit_loss = np.zeros(xx.shape)
for i in range(xx.shape[0]):
for j in range(yy.shape[1]):
theta = np.concatenate([xx[[i],j],yy[[i],j]],axis=0)
for k in range(data.shape[0]):
data_fit_loss[i,j] += loss(f(data[k,0], theta),data[k,1])
data_fit_loss /= data.shape[0]
return data_fit_loss, xx, yy
def get_best_theta(J,xx,yy):
best_theta_idx = np.unravel_index(J.argmin(), J.shape)
best_theta = np.concatenate([xx[[best_theta_idx[0]],best_theta_idx[1]], yy[[best_theta_idx[0]],best_theta_idx[1]]], axis=0)
return best_theta
def mk_plot(J,data,true_theta,best_theta):
u = 4
xx = np.linspace(-u,u,101)
plt.plot(xx,f(xx,best_theta),c='b',linewidth=2,zorder=1)
plt.plot(xx,f(xx,true_theta),'--',c='g',linewidth=3,zorder=2)
plt.scatter(data[:,0],data[:,1],c='k',zorder=3)
plt.gca().set_xlim(-u,u)
plt.gca().set_ylim(-u,u)
plt.gca().set_aspect('equal')
plt.gca().set_xlabel('$x$')
plt.gca().set_ylabel('$y$',rotation=0)
def mk_fit(true_theta, noise_sd, N_data_points, data_mean, data_sd):
data = mk_data(true_theta, noise_sd, N_data_points, data_mean, data_sd)
J, xx, yy = mk_loss_map(data,f)
best_theta = get_best_theta(J, xx, yy)
return J, best_theta, data
def f(x,theta):
y = np.sin(theta[1]*x**2) + theta[0]*x
return y
def loss(x,y):
return np.abs(x-y)**0.25
seed = 4
np.random.seed(seed)
true_theta = np.array([0.2,-0.5])
noise_sd = 0.2
data_mean = -3
data_sd = 0.5
N_data_points = 25
J, best_theta, data = mk_fit(true_theta, noise_sd, N_data_points, data_mean, data_sd)
mk_plot(J, data, true_theta, best_theta)
@phillipi
Copy link
Author

Noticed some questions on this: "why loss **0.25?" "why grid search?"
-- For visualization of J. You can call plt.imshow(J) to get a nice looking loss landscape.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment