Created
May 7, 2020 16:50
-
-
Save rcassani/699e78a67c476aab15fc7a9e84a94f8a to your computer and use it in GitHub Desktop.
Histogram overlap
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function [nElementsNorm, xCenters, heightsNorm, edges] = hist_norm(data, nBins) | |
% | |
% HIST_NORM(DATA, NBINS) computes the Histogram for the vector DATA | |
% using NBINS, and normalize it so the area occupied by the bars with heigth | |
% NELEMENTSNORM with center at XCENTERS sums up to 1. | |
% The resulting histogram (i.e., the barrs) can be plotted with | |
% bar(xCenters, nElementsNorm) | |
% alternatively, the empirical PDF can be plotted with | |
% area(heightsNorm,edges) | |
% compute the Histogram for DATA | |
[nElements, xCenters] = hist(data, nBins); | |
% half distance between bars | |
deltaBar = mean(abs(diff(xCenters))) / 2; | |
% bar edges | |
edgeLow = xCenters - deltaBar; | |
edgeHigh = xCenters + (deltaBar * .99); | |
edges = [edgeLow;edgeHigh]; | |
edges = edges(:); | |
% duplicate each element in the array nElements | |
heights = [nElements;nElements]; | |
heights = heights(:); | |
% normalize the histogram area to sum 1 | |
histArea = trapz(edges,heights); | |
nElementsNorm = nElements/histArea; | |
heightsNorm = heights/histArea; | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function [overlap] = hist_overlap(dataX, dataY, nBinsX, nBinsY, vplot, color1, color2, color3) | |
% [overlap] = hist_overlap(dataX, dataY, nBinsX, nBinsY, vplot, color1, color2, color3) | |
% | |
% computes the overlap between two sample distributions from dataX and dataY | |
% nBinsX defines the number of bins to use in X histogram | |
% nBinsY defines the number of bins to use in Y histogram | |
% VPLOT can be 'true' or 'false' to show or not the histograms and their overlap | |
% The following parameters are optional | |
% Both histograms are Normalized so its area is equal to 1 | |
% COLOR1 defines the filling color of the first distribution, | |
% COLOR2 defines the filling color of the second distribution and | |
% COLOR3 indicates the color of the overlaping area. | |
% As the histograms for dataX and dataY are normalized, OVERLAP indicates | |
% the overlaping fraction taking values from 0 to 1 | |
% | |
% Example of utilization: | |
% x = randn(1,1000); | |
% y = 1+randn(1,1000); | |
% hist_overlap(x, y) | |
% alpha(0.6); | |
%% Argument validation | |
if nargin < 2 | |
error('At least 2 arguments required'); | |
end | |
if nargout > 0 | |
% The function returns a value to a variable | |
if ~exist('vplot','var') || isempty(vplot) | |
vplot = false; | |
end | |
else | |
vplot = true; | |
end | |
% Number of bins | |
if ~exist('nBinsX','var') || isempty(nBinsX) | |
nBinsX = 40; | |
end | |
if ~exist('nBinsY','var') || isempty(nBinsY) | |
nBinsY = 40; | |
end | |
% colors | |
if ~exist('color1','var') || isempty(color1) | |
color1 = 'black'; | |
end | |
if ~exist('color2','var') || isempty(color2) | |
color2 = 'blue'; | |
end | |
if ~exist('color3','var') || isempty(color3) | |
color3 = 'yellow'; | |
end | |
%Compute normalized histograms | |
[~, ~, heightsX, edgesX] = hist_norm(dataX, nBinsX); | |
[~, ~, heightsY, edgesY] = hist_norm(dataY, nBinsY); | |
minEdge = min([edgesX;edgesY]); | |
maxEdge = max([edgesX;edgesY]); | |
%Stair series | |
stairxX = [minEdge; edgesX; edgesX(end); edgesX(end)]; | |
stairxY = [minEdge; edgesY; edgesY(end); edgesY(end)]; | |
stairyX = [0; heightsX; heightsX(end); 0]; | |
stairyY = [0; heightsY; heightsY(end); 0]; | |
%Concatenate all the unique edges and its heights | |
%Unique edges | |
uEdgesX = [edgesX(1:2:end);edgesX(end)]; | |
uEdgesY = [edgesY(1:2:end);edgesY(end)]; | |
%Concatenate and sort Edges | |
edgesAll = sort([uEdgesX;uEdgesY]); | |
%Number of Edges and Heights expeted in the result | |
nEdgesAll = numel(edgesAll); | |
heightsOv = zeros([(nEdgesAll-1)*2,1]); | |
edgesOv = heightsOv; | |
k =1; | |
for iEdge = 1:nEdgesAll-1 | |
centerOv = mean(edgesAll(iEdge:iEdge+1)); | |
%Find the value of height for both staris and select the minimum | |
indexX = find(stairxX<=centerOv,1,'last'); | |
valueX = stairyX(indexX); | |
indexY = find(stairxY<=centerOv,1,'last'); | |
valueY = stairyY(indexY); | |
heightsOv(k) = min(valueX,valueY); | |
heightsOv(k+1) = heightsOv(k); | |
edgesOv(k) = edgesAll(iEdge); | |
edgesOv(k+1) = edgesAll(iEdge+1); | |
k = k+2; | |
end | |
%plot empiricial pdf (for dataX and dataY) its contour and | |
%the overlaping area | |
if ~vplot | |
% False vplot (Verbose Plot) | |
else | |
area(edgesX, heightsX, 'FaceColor',color1) | |
hold on | |
xlim([minEdge,maxEdge]); | |
area(edgesY, heightsY,'FaceColor',color2) | |
area(edgesOv, heightsOv, 'FaceColor',color3,'EdgeColor','none') | |
stairs(stairxX, stairyX, ':r'); | |
stairs(stairxY, stairyY, '--r'); | |
stairs(edgesOv, heightsOv, '-k'); | |
hold off; | |
end | |
%Overlap as fraction of 1 | |
overlap = trapz(edgesOv,heightsOv); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment