Skip to content

Instantly share code, notes, and snippets.

@amroamroamro
Last active January 12, 2024 22:12
Show Gist options
  • Star 77 You must be signed in to star a gist
  • Fork 15 You must be signed in to fork a gist
  • Save amroamroamro/1db8d69b4b65e8bc66a6 to your computer and use it in GitHub Desktop.
Save amroamroamro/1db8d69b4b65e8bc66a6 to your computer and use it in GitHub Desktop.
[Python] Fitting plane/surface to a set of data points

Python version of the MATLAB code in this Stack Overflow post: https://stackoverflow.com/a/18648210/97160

The example shows how to determine the best-fit plane/surface (1st or higher order polynomial) over a set of three-dimensional points.

Implemented in Python + NumPy + SciPy + matplotlib.

quadratic_surface


EDIT (2023-06-16)

I added a new example fit.py that shows polynomial fitting of any n-th order, as well as the same thing but using scikit-learn functions fit-sklearn.py.

peaks

#!/usr/bin/evn python
import numpy as np
import scipy.linalg
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
# some 3-dim points
mean = np.array([0.0,0.0,0.0])
cov = np.array([[1.0,-0.5,0.8], [-0.5,1.1,0.0], [0.8,0.0,1.0]])
data = np.random.multivariate_normal(mean, cov, 50)
# regular grid covering the domain of the data
X,Y = np.meshgrid(np.arange(-3.0, 3.0, 0.5), np.arange(-3.0, 3.0, 0.5))
XX = X.flatten()
YY = Y.flatten()
order = 1 # 1: linear, 2: quadratic
if order == 1:
# best-fit linear plane
A = np.c_[data[:,0], data[:,1], np.ones(data.shape[0])]
C,_,_,_ = scipy.linalg.lstsq(A, data[:,2]) # coefficients
# evaluate it on grid
Z = C[0]*X + C[1]*Y + C[2]
# or expressed using matrix/vector product
#Z = np.dot(np.c_[XX, YY, np.ones(XX.shape)], C).reshape(X.shape)
elif order == 2:
# best-fit quadratic curve
A = np.c_[np.ones(data.shape[0]), data[:,:2], np.prod(data[:,:2], axis=1), data[:,:2]**2]
C,_,_,_ = scipy.linalg.lstsq(A, data[:,2])
# evaluate it on a grid
Z = np.dot(np.c_[np.ones(XX.shape), XX, YY, XX*YY, XX**2, YY**2], C).reshape(X.shape)
# plot points and fitted surface
fig = plt.figure()
ax = fig.gca(projection='3d')
ax.plot_surface(X, Y, Z, rstride=1, cstride=1, alpha=0.2)
ax.scatter(data[:,0], data[:,1], data[:,2], c='r', s=50)
plt.xlabel('X')
plt.ylabel('Y')
ax.set_zlabel('Z')
ax.axis('equal')
ax.axis('tight')
plt.show()
import numpy as np
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import make_pipeline
import matplotlib.pyplot as plt
def generateData(n = 30):
# similar to peaks() function in MATLAB
g = np.linspace(-3.0, 3.0, n)
X, Y = np.meshgrid(g, g)
X, Y = X.reshape(-1,1), Y.reshape(-1,1)
Z = 3 * (1 - X)**2 * np.exp(- X**2 - (Y+1)**2) \
- 10 * (X/5 - X**3 - Y**5) * np.exp(- X**2 - Y**2) \
- 1/3 * np.exp(- (X+1)**2 - Y**2)
return X, Y, Z
def names2model(names):
# C[i] * X^n * Y^m
return ' + '.join([
f"C[{i}]*{n.replace(' ','*')}"
for i,n in enumerate(names)])
# generate some random 3-dim points
X, Y, Z = generateData()
# 1=linear, 2=quadratic, 3=cubic, ..., nth degree
order = 11
# best-fit polynomial surface
model = make_pipeline(
PolynomialFeatures(degree=order),
LinearRegression(fit_intercept=False))
model.fit(np.c_[X, Y], Z)
m = names2model(model[0].get_feature_names_out(['X', 'Y']))
C = model[1].coef_.T # coefficients
r2 = model.score(np.c_[X, Y], Z) # R-squared
# print summary
print(f'data = {Z.size}x3')
print(f'model = {m}')
print(f'coefficients =\n{C}')
print(f'R2 = {r2}')
# uniform grid covering the domain of the data
XX,YY = np.meshgrid(np.linspace(X.min(), X.max(), 20), np.linspace(Y.min(), Y.max(), 20))
# evaluate model on grid
ZZ = model.predict(np.c_[XX.flatten(), YY.flatten()]).reshape(XX.shape)
# plot points and fitted surface
ax = plt.figure().add_subplot(projection='3d')
ax.scatter(X, Y, Z, c='r', s=2)
ax.plot_surface(XX, YY, ZZ, rstride=1, cstride=1, alpha=0.2, linewidth=0.5, edgecolor='b')
ax.axis('tight')
ax.view_init(azim=-60.0, elev=30.0)
ax.set_xlabel('X')
ax.set_ylabel('Y')
ax.set_zlabel('Z')
plt.show()
import numpy as np
from scipy.linalg import lstsq
import matplotlib.pyplot as plt
def generateData(n = 30):
# similar to peaks() function in MATLAB
g = np.linspace(-3.0, 3.0, n)
X, Y = np.meshgrid(g, g)
X, Y = X.reshape(-1,1), Y.reshape(-1,1)
Z = 3 * (1 - X)**2 * np.exp(- X**2 - (Y+1)**2) \
- 10 * (X/5 - X**3 - Y**5) * np.exp(- X**2 - Y**2) \
- 1/3 * np.exp(- (X+1)**2 - Y**2)
return X, Y, Z
def exp2model(e):
# C[i] * X^n * Y^m
return ' + '.join([
f'C[{i}]' +
('*' if x>0 or y>0 else '') +
(f'X^{x}' if x>1 else 'X' if x==1 else '') +
('*' if x>0 and y>0 else '') +
(f'Y^{y}' if y>1 else 'Y' if y==1 else '')
for i,(x,y) in enumerate(e)
])
# generate some random 3-dim points
X, Y, Z = generateData()
# 1=linear, 2=quadratic, 3=cubic, ..., nth degree
order = 11
# calculate exponents of design matrix
#e = [(x,y) for x in range(0,order+1) for y in range(0,order-x+1)]
e = [(x,y) for n in range(0,order+1) for y in range(0,n+1) for x in range(0,n+1) if x+y==n]
eX = np.asarray([[x] for x,_ in e]).T
eY = np.asarray([[y] for _,y in e]).T
# best-fit polynomial surface
A = (X ** eX) * (Y ** eY)
C,resid,_,_ = lstsq(A, Z) # coefficients
# calculate R-squared from residual error
r2 = 1 - resid[0] / (Z.size * Z.var())
# print summary
print(f'data = {Z.size}x3')
print(f'model = {exp2model(e)}')
print(f'coefficients =\n{C}')
print(f'R2 = {r2}')
# uniform grid covering the domain of the data
XX,YY = np.meshgrid(np.linspace(X.min(), X.max(), 20), np.linspace(Y.min(), Y.max(), 20))
# evaluate model on grid
A = (XX.reshape(-1,1) ** eX) * (YY.reshape(-1,1) ** eY)
ZZ = np.dot(A, C).reshape(XX.shape)
# plot points and fitted surface
ax = plt.figure().add_subplot(projection='3d')
ax.scatter(X, Y, Z, c='r', s=2)
ax.plot_surface(XX, YY, ZZ, rstride=1, cstride=1, alpha=0.2, linewidth=0.5, edgecolor='b')
ax.axis('tight')
ax.view_init(azim=-60.0, elev=30.0)
ax.set_xlabel('X')
ax.set_ylabel('Y')
ax.set_zlabel('Z')
plt.show()
@amroamroamro
Copy link
Author

@Lukxl

maybe you need to adjust the stride/count params in the surface plot function to fit your data range:

ax.plot_surface(X, Y, Z, rstride=1, cstride=1, alpha=0.2, linewidth=0.5, edgecolor='b')

Refer to docs:

@Lukxl
Copy link

Lukxl commented Jan 16, 2020

Thank you for this idea. I'll check and try this

Edit: Still I don't see the surface. I only get one line in my 3d plot.

Is it possible that the fitted surface results in a 2D line?

@amroamroamro
Copy link
Author

@Lukxl

I'm assuming you adjusted the grid in the code to cover your actual data range?

# regular grid covering the domain of the data
mn = np.min(data, axis=0)
mx = np.max(data, axis=0)
X,Y = np.meshgrid(np.linspace(mn[0], mx[0], 20), np.linspace(mn[1], mx[1], 20))

You should also make sure that the data is well-behaved (i.e look at the scatter plot) and that the fit didn't result in a degenerate model (manually inspect the coefficient values in C)

@Lukxl
Copy link

Lukxl commented Jan 17, 2020

I did. I figured out how I can do it.
I needed another fit function as you used.
Thanks for the well support.

@MSchmidt99
Copy link

MSchmidt99 commented Mar 8, 2020

@madgrizzle

I took the time to linearize this for up to the nth degree over the afternoon. You will want to make sure that all your data has 64 bit datatype (ex: np.int64(x) or np.float64(z)) because at higher degree calculations things will begin to break when overflow warnings come up.
This is what I threw together:

# functions

# in: [meshgrid], [meshgrid], [np list of coordinate pair np lists. ex: [[x1,y1,z1], [x2,y2,z2], etc.] ], [degree]
# out: [Z]
def curve(X, Y, coord, n):
    XX = X.flatten()
    YY = Y.flatten()

    # best-fit curve
    A = XYchooseN(coord[:,0], coord[:,1], n)
    C,_,_,_ = scipy.linalg.lstsq(A, coord[:,2])
    print("Calculating C in curve() done")
    # evaluate it on a grid
    Z = np.dot(XYchooseN(XX, YY, n), C).reshape(X.shape)
    return Z

# in: [array], [array], [int]
# out: sum from k=0 to k=n of n choose k for x^n-k * y^k (coefficients ignored)
def XYchooseN(x,y,n):
    XYchooseN = []
    n = n+1
    for j in range(len(x)):
        I = x[j]
        J = y[j]
        matrix = []
        Is = []
        Js = []
        for i in range(0,n):
            Is.append(I**i)
            Js.append(J**i)
            matrix.append(np.concatenate((np.ones(n-i),np.zeros(i))))
        Is = np.array(Is)
        Js = np.array(Js)[np.newaxis]
        IsJs0s = matrix * Is * Js.T
        IsJs = []
        for i in range(0,n):
            IsJs = np.concatenate((IsJs,IsJs0s[i,:n-i]))
        XYchooseN.append(IsJs)
    return np.array(XYchooseN)

X,Y = np.meshgrid(x_data,y_data) # surface meshgrid

Z = curve(X, Y, coord, 3)

# todo: cmap by avg from (x1,y1) to (x2,y2) of |Z height - scatter height|

ax.plot_surface(X, Y, Z, rstride=1, cstride=1, alpha=0.5, color='r')

@13Ducks
Copy link

13Ducks commented Mar 17, 2020

@MSchmidt99

Thanks for the code. I am confused about what the "coord" variable represents.

Thanks.

@MSchmidt99
Copy link

MSchmidt99 commented Mar 18, 2020

@13Ducks
Sorry about that, in my comment I made it look like a tuple. coord is just a numpy array of shape (n, 3) where n is the number of coordinates, and 3 is the graph dimensions with index 0 as X, 1 as Y, and 2 as Z. I have edited the function description in my earlier comment to make it less misleading.

@karlmolina
Copy link

@MSchmidt99

What is x_data and y_data?

@GrljAles
Copy link

GrljAles commented Apr 9, 2020

Would it be possible to use this to detrend my data, and how would I approach this?

@EmanuelNazareth
Copy link

EmanuelNazareth commented May 2, 2020

Excellent code, very handy. For those interested in R2 and R2 adjusted here's the code:

from sklearn.metrics import r2_score

z_pred= C[0]*np.array(x) + C[1]*np.array(y) + C[2]

#print R2
r2_score(z, z_pred)

def Adj_r2(Rsquared,n,p):
#n = number of data points, p = independent variables
output = 1-(1-Rsquared)*(n-1)/(n-p-1)
return output

#print R2_adjusted
Adj_r2(r2_score(z, z_calc),102,3)

@gabrilmf92
Copy link

gabrilmf92 commented Jul 9, 2020

Hi everyone,

Dear @amroamroamro,
In relation to your original code, I have x,y,z-axis data stored in 3 lists named x, y and z. The structure of my table data is the next:

0 | 2 | 4
13.7 | 4436.95646 | 4774.21409
14.3 | 4311.55363 | 4688.34413
14.8 | 4299.82329 | 4687.26375
15.3 | 4164.97164 | 4587.78392
where the values of 0 column are the values of x axis, 2 and 4 the values of y axis and the rest of values the z axis

I would like to get an equation like this:
a·x^2·y^2 +b·x^2·y + c·x^2 + d·x·y^2 + e·x·y +f·x + g·y^2 + h·y + i
How can I modify the code to get the equation?
A little bit more explanation would be very helpful.
Thanks.

@amroamroamro
Copy link
Author

@gabrilmf92

I believe I already answered something similar, see above comments:

# your 3-dim data
x = [13.7, 14.3, 14.8, 15.3]
y = [4436.95646, 4311.55363, 4299.82329, 4164.97164]
z = [4774.21409, 4688.34413, 4687.26375, 4587.78392]
data = np.c_[x,y,z]

# regular grid covering the domain of the data
mn = np.min(data, axis=0)
mx = np.max(data, axis=0)
X,Y = np.meshgrid(np.linspace(mn[0], mx[0], 20), np.linspace(mn[1], mx[1], 20))
XX = X.flatten()
YY = Y.flatten()

order = 2  # quadratic fit

PS: your data consists of merely 4 points looking almost perfectly co-planar, so a linear fit would suffice.

@dnoon20
Copy link

dnoon20 commented Aug 23, 2020

How can I modify my surface so that the bounds are from a polygonal domain (in that instead of a standard x/y domain, the area within the 2d polygon represents the limits of the surface)?

@alexlib
Copy link

alexlib commented Aug 27, 2020

Here is a cubic version

elif order == 3:
    # M = [ones(size(x)), x, y, x.^2, x.*y, y.^2, x.^3, x.^2.*y, x.*y.^2, y.^3]
    A = np.c_[np.ones(data.shape[0]), data[:,:2], data[:,0]**2, np.prod(data[:,:2], axis=1), \
              data[:,1]**2, data[:,0]**3, np.prod(np.c_[data[:,0]**2,data[:,1]],axis=1), \
              np.prod(np.c_[data[:,0],data[:,1]**2],axis=1), data[:,2]**3]
    C,_,_,_ = scipy.linalg.lstsq(A, data[:,2])
    
    Z = np.dot(np.c_[np.ones(XX.shape), XX, YY, XX**2, XX*YY, YY**2, XX**3, XX**2*YY, XX*YY**2, YY**3], C).reshape(X.shape)

@mirgilani
Copy link

mirgilani commented Sep 7, 2020

Dear @alexlib
Thanks for code for cubic oder. I have print the coefficient . unfortunately I cannot understand which coefficient belong to which one.
If I am not mistaken : print(c) give coefficient like : c[6]x^3 + c[9]y^3 + c[7]x^2 y + c[8] x y^2 + c[3] x^2 + c[5] y^2 + c[4] xy + c[1]x + c[2] y + c[0]. but the result is really odd.
I will appriciate for any help

@alexlib
Copy link

alexlib commented Sep 16, 2020

Dear @alexlib
Thanks for code for cubic oder. I have print the coefficient . unfortunately I cannot understand which coefficient belong to which one.
If I am not mistaken : print(c) give coefficient like : c[6]x^3 + c[9]y^3 + c[7]x^2 y + c[8] x y^2 + c[3] x^2 + c[5] y^2 + c[4] xy + c[1]x + c[2] y + c[0]. but the result is really odd.
I will appriciate for any help

please see the fork
https://gist.github.com/alexlib/05abf4e42a0341047b1da12e69c2f3a3

@PhuDoTuong
Copy link

Does anyone try to use Ceres Solver ?

@lukaszprawdziwy
Copy link

lukaszprawdziwy commented Sep 16, 2021

Hello,
Thank you for the handy code.
Is there a way to fit a surface with an exponential function?
I think of something like:
c[0] + c[1] * x + c[2] * y+ c[3] * np.exp(c[4] * x) + c[5] * (np.exp(c[6] * y)

how to feed it to scipy.linalg.lstsq?

thanks

@Mozzaurel
Copy link

Thank you @alexlib for the cubic version.
There is one typo error inside : the last coeff in A should be data[:,1]**3 and not data[:,2]**3
With that it works fine!

elif order == 3:
    # M = [ones(size(x)), x, y, x.^2, x.*y, y.^2, x.^3, x.^2.*y, x.*y.^2, y.^3]
    A = np.c_[np.ones(data.shape[0]), data[:,:2], data[:,0]**2, np.prod(data[:,:2], axis=1), \
              data[:,1]**2, data[:,0]**3, np.prod(np.c_[data[:,0]**2,data[:,1]],axis=1), \
              np.prod(np.c_[data[:,0],data[:,1]**2],axis=1), data[:,1]**3]
    C,_,_,_ = scipy.linalg.lstsq(A, data[:,2])
    
    Z = np.dot(np.c_[np.ones(XX.shape), XX, YY, XX**2, XX*YY, YY**2, XX**3, XX**2*YY, XX*YY**2, YY**3], C).reshape(X.shape)

@alexlib
Copy link

alexlib commented Jan 26, 2022

@jeffreyi03
Copy link

@alexlib

I just used your code to get the cubic order. Just to be sure when I print out C i get 10 values. What is the order for the coefficients and what does the equation look like? I don't want to get this wrong.

Thank you and good job!

@jeffreyi03
Copy link

@mirgilani Did you figure this out? Can you tell me the correct order of coefficients?

@alexlib
Copy link

alexlib commented Jun 22, 2022

[ones(size(x)), x, y, x.^2, x.*y, y.^2, x.^3, x.^2.*y, x.*y.^2, y.^3]

@jeffreyi03
Copy link

jeffreyi03 commented Jun 22, 2022 via email

@alexlib
Copy link

alexlib commented Jun 22, 2022

Just to make sure I understand, C[0]+C[1]x+C[2]y+C[3]x^2+C[4]xy.... ?

Yes

@jensleitloff
Copy link

jensleitloff commented May 10, 2023

You can easily use any polynomal order without implementing it explicitly using sklearn.preprocessing.PolynomialFeatures and sklearn.linear_model.LinearRegression:
https://stackoverflow.com/a/71214371
Results are identical and you could also combine it with robust RANSAC by simply replacing LinearRegression.

@ditia298
Copy link

Thank you for the excellent python solution for fitting a surface. I just wanted to know, if I may, what if we have weights on the datapoints? for instance, x1, y1 comes with weight w1, x2, y2 with w2 etc. I need to take weight into accont while fitting the surface. Could you please enlighten me on how can I achieve that?

@jensleitloff
Copy link

jensleitloff commented Jun 16, 2023

This is possible using PolynomialFeatures (as intended) as preprocessing and e.g. LinearRegression or Ridge or HuberRegressor or any other regressor which supports sample weights for subsequent regression. Thus, it is a two step process as already show my previous posted link to stackoverflow: https://stackoverflow.com/a/71214371
First, you generate all possible polynomial features of your input points (x1, x2 ...) using PolynomialFeatures. The input points (x1, x2 ...) can be multidimensional, e.g. 2-dimensional for 3-dimensional fitting (see following example). Afterwards you fit (e.g. fit LinearRegression) your polynomial features to (y1, y2 ...) with corresponding weights (w1, w2 ...).
The best way to perform this 2-step process is to use make_pipeline. A nice example is given in Robust linear estimator fitting.
Using the example data from a previous post of amroamroamro, the complete example would look like this:

import numpy as np
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import make_pipeline

# your 3-dim data
x = [13.7, 14.3, 14.8, 15.3]
y = [4436.95646, 4311.55363, 4299.82329, 4164.97164]
z = [4774.21409, 4688.34413, 4687.26375, 4587.78392]
data = np.c_[x,y,z]
# weights
w = [1, 2, 1 , 0.5]
# e.g. quadratic polynom
order = 2  

# regular grid covering the domain of the data
mn = np.min(data, axis=0)
mx = np.max(data, axis=0)
X,Y = np.meshgrid(np.linspace(mn[0], mx[0], 20), np.linspace(mn[1], mx[1], 20))
XX = X.flatten()
YY = Y.flatten()

# fitting with make_pipeline
model = make_pipeline(PolynomialFeatures(degree=order), LinearRegression())
model.fit(data[:, :2], data[:, -1], linearregression__sample_weight=w)
ZZ = model.predict(np.c_[XX, YY])

# fitting without make_pipeline
poly = PolynomialFeatures(degree=order)
data_poly = poly.fit_transform(data[:, :2])
model_poly = LinearRegression()
model_poly.fit(data_poly, data[:, -1], sample_weight=w)
ZZ_poly = model_poly.predict(poly.transform(np.c_[XX, YY]))

assert(np.all(ZZ==ZZ_poly))

You can use the models to estimate any z-value for given (error-free) x,y-values.

@jensleitloff
Copy link

Here is the original example with (random) weights and the above procedure:

import numpy as np
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import make_pipeline
import matplotlib.pyplot as plt

USE_WEIGHTS = True

# some 3-dim points
mean = np.array([0.0,0.0,0.0])
cov = np.array([[1.0,-0.5,0.8], [-0.5,1.1,0.0], [0.8,0.0,1.0]])
data = np.random.multivariate_normal(mean, cov, 50)
if USE_WEIGHTS:
    # weights can't be negative
    w = np.abs(np.random.normal(loc=1, scale=1, size=50))
else:
    w = np.ones(shape=50)

# regular grid covering the domain of the data
X,Y = np.meshgrid(np.arange(-3.0, 3.0, 0.5), np.arange(-3.0, 3.0, 0.5))
XX = X.flatten()
YY = Y.flatten()

order = 2    # 1: linear, 2: quadratic
model = make_pipeline(PolynomialFeatures(degree=order), LinearRegression())
model.fit(data[:, :2], data[:, -1], linearregression__sample_weight=w)
Z = model.predict(np.c_[XX, YY]).reshape(X.shape)

# plot points and fitted surface
ax = plt.figure().add_subplot(projection='3d')
ax.plot_surface(X, Y, Z, rstride=1, cstride=1, alpha=0.2)
ax.scatter(data[:,0], data[:,1], data[:,2], c='r', s=50)
plt.xlabel('X')
plt.ylabel('Y')
ax.set_zlabel('Z')
ax.axis('equal')
ax.axis('tight')
plt.show()

@amroamroamro
Copy link
Author

@jensleitloff see the new update fit.py (and the same thing in fit-sklearn.py using sklearn functions)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment