Skip to content

Instantly share code, notes, and snippets.

@amroamroamro
Last active January 12, 2024 22:12
Show Gist options
  • Star 77 You must be signed in to star a gist
  • Fork 15 You must be signed in to fork a gist
  • Save amroamroamro/1db8d69b4b65e8bc66a6 to your computer and use it in GitHub Desktop.
Save amroamroamro/1db8d69b4b65e8bc66a6 to your computer and use it in GitHub Desktop.
[Python] Fitting plane/surface to a set of data points

Python version of the MATLAB code in this Stack Overflow post: https://stackoverflow.com/a/18648210/97160

The example shows how to determine the best-fit plane/surface (1st or higher order polynomial) over a set of three-dimensional points.

Implemented in Python + NumPy + SciPy + matplotlib.

quadratic_surface


EDIT (2023-06-16)

I added a new example fit.py that shows polynomial fitting of any n-th order, as well as the same thing but using scikit-learn functions fit-sklearn.py.

peaks

#!/usr/bin/evn python
import numpy as np
import scipy.linalg
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
# some 3-dim points
mean = np.array([0.0,0.0,0.0])
cov = np.array([[1.0,-0.5,0.8], [-0.5,1.1,0.0], [0.8,0.0,1.0]])
data = np.random.multivariate_normal(mean, cov, 50)
# regular grid covering the domain of the data
X,Y = np.meshgrid(np.arange(-3.0, 3.0, 0.5), np.arange(-3.0, 3.0, 0.5))
XX = X.flatten()
YY = Y.flatten()
order = 1 # 1: linear, 2: quadratic
if order == 1:
# best-fit linear plane
A = np.c_[data[:,0], data[:,1], np.ones(data.shape[0])]
C,_,_,_ = scipy.linalg.lstsq(A, data[:,2]) # coefficients
# evaluate it on grid
Z = C[0]*X + C[1]*Y + C[2]
# or expressed using matrix/vector product
#Z = np.dot(np.c_[XX, YY, np.ones(XX.shape)], C).reshape(X.shape)
elif order == 2:
# best-fit quadratic curve
A = np.c_[np.ones(data.shape[0]), data[:,:2], np.prod(data[:,:2], axis=1), data[:,:2]**2]
C,_,_,_ = scipy.linalg.lstsq(A, data[:,2])
# evaluate it on a grid
Z = np.dot(np.c_[np.ones(XX.shape), XX, YY, XX*YY, XX**2, YY**2], C).reshape(X.shape)
# plot points and fitted surface
fig = plt.figure()
ax = fig.gca(projection='3d')
ax.plot_surface(X, Y, Z, rstride=1, cstride=1, alpha=0.2)
ax.scatter(data[:,0], data[:,1], data[:,2], c='r', s=50)
plt.xlabel('X')
plt.ylabel('Y')
ax.set_zlabel('Z')
ax.axis('equal')
ax.axis('tight')
plt.show()
import numpy as np
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import make_pipeline
import matplotlib.pyplot as plt
def generateData(n = 30):
# similar to peaks() function in MATLAB
g = np.linspace(-3.0, 3.0, n)
X, Y = np.meshgrid(g, g)
X, Y = X.reshape(-1,1), Y.reshape(-1,1)
Z = 3 * (1 - X)**2 * np.exp(- X**2 - (Y+1)**2) \
- 10 * (X/5 - X**3 - Y**5) * np.exp(- X**2 - Y**2) \
- 1/3 * np.exp(- (X+1)**2 - Y**2)
return X, Y, Z
def names2model(names):
# C[i] * X^n * Y^m
return ' + '.join([
f"C[{i}]*{n.replace(' ','*')}"
for i,n in enumerate(names)])
# generate some random 3-dim points
X, Y, Z = generateData()
# 1=linear, 2=quadratic, 3=cubic, ..., nth degree
order = 11
# best-fit polynomial surface
model = make_pipeline(
PolynomialFeatures(degree=order),
LinearRegression(fit_intercept=False))
model.fit(np.c_[X, Y], Z)
m = names2model(model[0].get_feature_names_out(['X', 'Y']))
C = model[1].coef_.T # coefficients
r2 = model.score(np.c_[X, Y], Z) # R-squared
# print summary
print(f'data = {Z.size}x3')
print(f'model = {m}')
print(f'coefficients =\n{C}')
print(f'R2 = {r2}')
# uniform grid covering the domain of the data
XX,YY = np.meshgrid(np.linspace(X.min(), X.max(), 20), np.linspace(Y.min(), Y.max(), 20))
# evaluate model on grid
ZZ = model.predict(np.c_[XX.flatten(), YY.flatten()]).reshape(XX.shape)
# plot points and fitted surface
ax = plt.figure().add_subplot(projection='3d')
ax.scatter(X, Y, Z, c='r', s=2)
ax.plot_surface(XX, YY, ZZ, rstride=1, cstride=1, alpha=0.2, linewidth=0.5, edgecolor='b')
ax.axis('tight')
ax.view_init(azim=-60.0, elev=30.0)
ax.set_xlabel('X')
ax.set_ylabel('Y')
ax.set_zlabel('Z')
plt.show()
import numpy as np
from scipy.linalg import lstsq
import matplotlib.pyplot as plt
def generateData(n = 30):
# similar to peaks() function in MATLAB
g = np.linspace(-3.0, 3.0, n)
X, Y = np.meshgrid(g, g)
X, Y = X.reshape(-1,1), Y.reshape(-1,1)
Z = 3 * (1 - X)**2 * np.exp(- X**2 - (Y+1)**2) \
- 10 * (X/5 - X**3 - Y**5) * np.exp(- X**2 - Y**2) \
- 1/3 * np.exp(- (X+1)**2 - Y**2)
return X, Y, Z
def exp2model(e):
# C[i] * X^n * Y^m
return ' + '.join([
f'C[{i}]' +
('*' if x>0 or y>0 else '') +
(f'X^{x}' if x>1 else 'X' if x==1 else '') +
('*' if x>0 and y>0 else '') +
(f'Y^{y}' if y>1 else 'Y' if y==1 else '')
for i,(x,y) in enumerate(e)
])
# generate some random 3-dim points
X, Y, Z = generateData()
# 1=linear, 2=quadratic, 3=cubic, ..., nth degree
order = 11
# calculate exponents of design matrix
#e = [(x,y) for x in range(0,order+1) for y in range(0,order-x+1)]
e = [(x,y) for n in range(0,order+1) for y in range(0,n+1) for x in range(0,n+1) if x+y==n]
eX = np.asarray([[x] for x,_ in e]).T
eY = np.asarray([[y] for _,y in e]).T
# best-fit polynomial surface
A = (X ** eX) * (Y ** eY)
C,resid,_,_ = lstsq(A, Z) # coefficients
# calculate R-squared from residual error
r2 = 1 - resid[0] / (Z.size * Z.var())
# print summary
print(f'data = {Z.size}x3')
print(f'model = {exp2model(e)}')
print(f'coefficients =\n{C}')
print(f'R2 = {r2}')
# uniform grid covering the domain of the data
XX,YY = np.meshgrid(np.linspace(X.min(), X.max(), 20), np.linspace(Y.min(), Y.max(), 20))
# evaluate model on grid
A = (XX.reshape(-1,1) ** eX) * (YY.reshape(-1,1) ** eY)
ZZ = np.dot(A, C).reshape(XX.shape)
# plot points and fitted surface
ax = plt.figure().add_subplot(projection='3d')
ax.scatter(X, Y, Z, c='r', s=2)
ax.plot_surface(XX, YY, ZZ, rstride=1, cstride=1, alpha=0.2, linewidth=0.5, edgecolor='b')
ax.axis('tight')
ax.view_init(azim=-60.0, elev=30.0)
ax.set_xlabel('X')
ax.set_ylabel('Y')
ax.set_zlabel('Z')
plt.show()
@amroamroamro
Copy link
Author

@gabrilmf92

I believe I already answered something similar, see above comments:

# your 3-dim data
x = [13.7, 14.3, 14.8, 15.3]
y = [4436.95646, 4311.55363, 4299.82329, 4164.97164]
z = [4774.21409, 4688.34413, 4687.26375, 4587.78392]
data = np.c_[x,y,z]

# regular grid covering the domain of the data
mn = np.min(data, axis=0)
mx = np.max(data, axis=0)
X,Y = np.meshgrid(np.linspace(mn[0], mx[0], 20), np.linspace(mn[1], mx[1], 20))
XX = X.flatten()
YY = Y.flatten()

order = 2  # quadratic fit

PS: your data consists of merely 4 points looking almost perfectly co-planar, so a linear fit would suffice.

@dnoon20
Copy link

dnoon20 commented Aug 23, 2020

How can I modify my surface so that the bounds are from a polygonal domain (in that instead of a standard x/y domain, the area within the 2d polygon represents the limits of the surface)?

@alexlib
Copy link

alexlib commented Aug 27, 2020

Here is a cubic version

elif order == 3:
    # M = [ones(size(x)), x, y, x.^2, x.*y, y.^2, x.^3, x.^2.*y, x.*y.^2, y.^3]
    A = np.c_[np.ones(data.shape[0]), data[:,:2], data[:,0]**2, np.prod(data[:,:2], axis=1), \
              data[:,1]**2, data[:,0]**3, np.prod(np.c_[data[:,0]**2,data[:,1]],axis=1), \
              np.prod(np.c_[data[:,0],data[:,1]**2],axis=1), data[:,2]**3]
    C,_,_,_ = scipy.linalg.lstsq(A, data[:,2])
    
    Z = np.dot(np.c_[np.ones(XX.shape), XX, YY, XX**2, XX*YY, YY**2, XX**3, XX**2*YY, XX*YY**2, YY**3], C).reshape(X.shape)

@mirgilani
Copy link

mirgilani commented Sep 7, 2020

Dear @alexlib
Thanks for code for cubic oder. I have print the coefficient . unfortunately I cannot understand which coefficient belong to which one.
If I am not mistaken : print(c) give coefficient like : c[6]x^3 + c[9]y^3 + c[7]x^2 y + c[8] x y^2 + c[3] x^2 + c[5] y^2 + c[4] xy + c[1]x + c[2] y + c[0]. but the result is really odd.
I will appriciate for any help

@alexlib
Copy link

alexlib commented Sep 16, 2020

Dear @alexlib
Thanks for code for cubic oder. I have print the coefficient . unfortunately I cannot understand which coefficient belong to which one.
If I am not mistaken : print(c) give coefficient like : c[6]x^3 + c[9]y^3 + c[7]x^2 y + c[8] x y^2 + c[3] x^2 + c[5] y^2 + c[4] xy + c[1]x + c[2] y + c[0]. but the result is really odd.
I will appriciate for any help

please see the fork
https://gist.github.com/alexlib/05abf4e42a0341047b1da12e69c2f3a3

@PhuDoTuong
Copy link

Does anyone try to use Ceres Solver ?

@lukaszprawdziwy
Copy link

lukaszprawdziwy commented Sep 16, 2021

Hello,
Thank you for the handy code.
Is there a way to fit a surface with an exponential function?
I think of something like:
c[0] + c[1] * x + c[2] * y+ c[3] * np.exp(c[4] * x) + c[5] * (np.exp(c[6] * y)

how to feed it to scipy.linalg.lstsq?

thanks

@Mozzaurel
Copy link

Thank you @alexlib for the cubic version.
There is one typo error inside : the last coeff in A should be data[:,1]**3 and not data[:,2]**3
With that it works fine!

elif order == 3:
    # M = [ones(size(x)), x, y, x.^2, x.*y, y.^2, x.^3, x.^2.*y, x.*y.^2, y.^3]
    A = np.c_[np.ones(data.shape[0]), data[:,:2], data[:,0]**2, np.prod(data[:,:2], axis=1), \
              data[:,1]**2, data[:,0]**3, np.prod(np.c_[data[:,0]**2,data[:,1]],axis=1), \
              np.prod(np.c_[data[:,0],data[:,1]**2],axis=1), data[:,1]**3]
    C,_,_,_ = scipy.linalg.lstsq(A, data[:,2])
    
    Z = np.dot(np.c_[np.ones(XX.shape), XX, YY, XX**2, XX*YY, YY**2, XX**3, XX**2*YY, XX*YY**2, YY**3], C).reshape(X.shape)

@alexlib
Copy link

alexlib commented Jan 26, 2022

@jeffreyi03
Copy link

@alexlib

I just used your code to get the cubic order. Just to be sure when I print out C i get 10 values. What is the order for the coefficients and what does the equation look like? I don't want to get this wrong.

Thank you and good job!

@jeffreyi03
Copy link

@mirgilani Did you figure this out? Can you tell me the correct order of coefficients?

@alexlib
Copy link

alexlib commented Jun 22, 2022

[ones(size(x)), x, y, x.^2, x.*y, y.^2, x.^3, x.^2.*y, x.*y.^2, y.^3]

@jeffreyi03
Copy link

jeffreyi03 commented Jun 22, 2022 via email

@alexlib
Copy link

alexlib commented Jun 22, 2022

Just to make sure I understand, C[0]+C[1]x+C[2]y+C[3]x^2+C[4]xy.... ?

Yes

@jensleitloff
Copy link

jensleitloff commented May 10, 2023

You can easily use any polynomal order without implementing it explicitly using sklearn.preprocessing.PolynomialFeatures and sklearn.linear_model.LinearRegression:
https://stackoverflow.com/a/71214371
Results are identical and you could also combine it with robust RANSAC by simply replacing LinearRegression.

@ditia298
Copy link

Thank you for the excellent python solution for fitting a surface. I just wanted to know, if I may, what if we have weights on the datapoints? for instance, x1, y1 comes with weight w1, x2, y2 with w2 etc. I need to take weight into accont while fitting the surface. Could you please enlighten me on how can I achieve that?

@jensleitloff
Copy link

jensleitloff commented Jun 16, 2023

This is possible using PolynomialFeatures (as intended) as preprocessing and e.g. LinearRegression or Ridge or HuberRegressor or any other regressor which supports sample weights for subsequent regression. Thus, it is a two step process as already show my previous posted link to stackoverflow: https://stackoverflow.com/a/71214371
First, you generate all possible polynomial features of your input points (x1, x2 ...) using PolynomialFeatures. The input points (x1, x2 ...) can be multidimensional, e.g. 2-dimensional for 3-dimensional fitting (see following example). Afterwards you fit (e.g. fit LinearRegression) your polynomial features to (y1, y2 ...) with corresponding weights (w1, w2 ...).
The best way to perform this 2-step process is to use make_pipeline. A nice example is given in Robust linear estimator fitting.
Using the example data from a previous post of amroamroamro, the complete example would look like this:

import numpy as np
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import make_pipeline

# your 3-dim data
x = [13.7, 14.3, 14.8, 15.3]
y = [4436.95646, 4311.55363, 4299.82329, 4164.97164]
z = [4774.21409, 4688.34413, 4687.26375, 4587.78392]
data = np.c_[x,y,z]
# weights
w = [1, 2, 1 , 0.5]
# e.g. quadratic polynom
order = 2  

# regular grid covering the domain of the data
mn = np.min(data, axis=0)
mx = np.max(data, axis=0)
X,Y = np.meshgrid(np.linspace(mn[0], mx[0], 20), np.linspace(mn[1], mx[1], 20))
XX = X.flatten()
YY = Y.flatten()

# fitting with make_pipeline
model = make_pipeline(PolynomialFeatures(degree=order), LinearRegression())
model.fit(data[:, :2], data[:, -1], linearregression__sample_weight=w)
ZZ = model.predict(np.c_[XX, YY])

# fitting without make_pipeline
poly = PolynomialFeatures(degree=order)
data_poly = poly.fit_transform(data[:, :2])
model_poly = LinearRegression()
model_poly.fit(data_poly, data[:, -1], sample_weight=w)
ZZ_poly = model_poly.predict(poly.transform(np.c_[XX, YY]))

assert(np.all(ZZ==ZZ_poly))

You can use the models to estimate any z-value for given (error-free) x,y-values.

@jensleitloff
Copy link

Here is the original example with (random) weights and the above procedure:

import numpy as np
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import make_pipeline
import matplotlib.pyplot as plt

USE_WEIGHTS = True

# some 3-dim points
mean = np.array([0.0,0.0,0.0])
cov = np.array([[1.0,-0.5,0.8], [-0.5,1.1,0.0], [0.8,0.0,1.0]])
data = np.random.multivariate_normal(mean, cov, 50)
if USE_WEIGHTS:
    # weights can't be negative
    w = np.abs(np.random.normal(loc=1, scale=1, size=50))
else:
    w = np.ones(shape=50)

# regular grid covering the domain of the data
X,Y = np.meshgrid(np.arange(-3.0, 3.0, 0.5), np.arange(-3.0, 3.0, 0.5))
XX = X.flatten()
YY = Y.flatten()

order = 2    # 1: linear, 2: quadratic
model = make_pipeline(PolynomialFeatures(degree=order), LinearRegression())
model.fit(data[:, :2], data[:, -1], linearregression__sample_weight=w)
Z = model.predict(np.c_[XX, YY]).reshape(X.shape)

# plot points and fitted surface
ax = plt.figure().add_subplot(projection='3d')
ax.plot_surface(X, Y, Z, rstride=1, cstride=1, alpha=0.2)
ax.scatter(data[:,0], data[:,1], data[:,2], c='r', s=50)
plt.xlabel('X')
plt.ylabel('Y')
ax.set_zlabel('Z')
ax.axis('equal')
ax.axis('tight')
plt.show()

@amroamroamro
Copy link
Author

@jensleitloff see the new update fit.py (and the same thing in fit-sklearn.py using sklearn functions)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment