Last active
July 10, 2023 17:06
-
-
Save dpfoose/38ca2f5aee2aea175ecc6e599ca6e973 to your computer and use it in GitHub Desktop.
Plot a 95% confidence ellipse for a scatter plot in Plotly
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
def confidence_ellipse(x, y, n_std=1.96, size=100): | |
""" | |
Get the covariance confidence ellipse of *x* and *y*. | |
Parameters | |
---------- | |
x, y : array-like, shape (n, ) | |
Input data. | |
n_std : float | |
The number of standard deviations to determine the ellipse's radiuses. | |
size : int | |
Number of points defining the ellipse | |
Returns | |
------- | |
String containing an SVG path for the ellipse | |
References (H/T) | |
---------------- | |
https://matplotlib.org/3.1.1/gallery/statistics/confidence_ellipse.html | |
https://community.plotly.com/t/arc-shape-with-path/7205/5 | |
""" | |
if x.size != y.size: | |
raise ValueError("x and y must be the same size") | |
cov = np.cov(x, y) | |
pearson = cov[0, 1]/np.sqrt(cov[0, 0] * cov[1, 1]) | |
# Using a special case to obtain the eigenvalues of this | |
# two-dimensionl dataset. | |
ell_radius_x = np.sqrt(1 + pearson) | |
ell_radius_y = np.sqrt(1 - pearson) | |
theta = np.linspace(0, 2 * np.pi, size) | |
ellipse_coords = np.column_stack([ell_radius_x * np.cos(theta), ell_radius_y * np.sin(theta)]) | |
# Calculating the stdandard deviation of x from | |
# the squareroot of the variance and multiplying | |
# with the given number of standard deviations. | |
x_scale = np.sqrt(cov[0, 0]) * n_std | |
x_mean = np.mean(x) | |
# calculating the stdandard deviation of y ... | |
y_scale = np.sqrt(cov[1, 1]) * n_std | |
y_mean = np.mean(y) | |
translation_matrix = np.tile([x_mean, y_mean], (ellipse_coords.shape[0], 1)) | |
rotation_matrix = np.array([[np.cos(np.pi / 4), np.sin(np.pi / 4)], | |
[-np.sin(np.pi / 4), np.cos(np.pi / 4)]]) | |
scale_matrix = np.array([[x_scale, 0], | |
[0, y_scale]]) | |
ellipse_coords = ellipse_coords.dot(rotation_matrix).dot(scale_matrix) + translation_matrix | |
path = f'M {ellipse_coords[0, 0]}, {ellipse_coords[0, 1]}' | |
for k in range(1, len(ellipse_coords)): | |
path += f'L{ellipse_coords[k, 0]}, {ellipse_coords[k, 1]}' | |
path += ' Z' | |
return path | |
""" EXAMPLE """ | |
from plotly import graph_objects as go | |
from plotly.colors import DEFAULT_PLOTLY_COLORS | |
from sklearn.datasets import load_iris | |
from sklearn.decomposition import PCA | |
iris = load_iris() | |
pca = PCA(n_components=2) | |
scores = pca.fit_transform(iris.data) | |
fig = go.Figure() | |
for target_value, target_name in enumerate(iris.target_names): | |
color = DEFAULT_PLOTLY_COLORS[target_value] | |
fig.add_trace( | |
go.Scatter( | |
x=scores[iris.target==target_value, 0], | |
y=scores[iris.target==target_value, 1], | |
name=target_name, | |
mode='markers', | |
marker={'color': color} | |
) | |
) | |
fig.add_shape(type='path', | |
path=confidence_ellipse(scores[iris.target==target_value, 0], scores[iris.target==target_value, 1]), | |
line={'dash': 'dot'}, | |
line_color=color) | |
fig.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment