Skip to content

Instantly share code, notes, and snippets.

@rcassani
Created May 7, 2020 16:50
Show Gist options
  • Save rcassani/699e78a67c476aab15fc7a9e84a94f8a to your computer and use it in GitHub Desktop.
Save rcassani/699e78a67c476aab15fc7a9e84a94f8a to your computer and use it in GitHub Desktop.
Histogram overlap
function [nElementsNorm, xCenters, heightsNorm, edges] = hist_norm(data, nBins)
%
% HIST_NORM(DATA, NBINS) computes the Histogram for the vector DATA
% using NBINS, and normalize it so the area occupied by the bars with heigth
% NELEMENTSNORM with center at XCENTERS sums up to 1.
% The resulting histogram (i.e., the barrs) can be plotted with
% bar(xCenters, nElementsNorm)
% alternatively, the empirical PDF can be plotted with
% area(heightsNorm,edges)
% compute the Histogram for DATA
[nElements, xCenters] = hist(data, nBins);
% half distance between bars
deltaBar = mean(abs(diff(xCenters))) / 2;
% bar edges
edgeLow = xCenters - deltaBar;
edgeHigh = xCenters + (deltaBar * .99);
edges = [edgeLow;edgeHigh];
edges = edges(:);
% duplicate each element in the array nElements
heights = [nElements;nElements];
heights = heights(:);
% normalize the histogram area to sum 1
histArea = trapz(edges,heights);
nElementsNorm = nElements/histArea;
heightsNorm = heights/histArea;
function [overlap] = hist_overlap(dataX, dataY, nBinsX, nBinsY, vplot, color1, color2, color3)
% [overlap] = hist_overlap(dataX, dataY, nBinsX, nBinsY, vplot, color1, color2, color3)
%
% computes the overlap between two sample distributions from dataX and dataY
% nBinsX defines the number of bins to use in X histogram
% nBinsY defines the number of bins to use in Y histogram
% VPLOT can be 'true' or 'false' to show or not the histograms and their overlap
% The following parameters are optional
% Both histograms are Normalized so its area is equal to 1
% COLOR1 defines the filling color of the first distribution,
% COLOR2 defines the filling color of the second distribution and
% COLOR3 indicates the color of the overlaping area.
% As the histograms for dataX and dataY are normalized, OVERLAP indicates
% the overlaping fraction taking values from 0 to 1
%
% Example of utilization:
% x = randn(1,1000);
% y = 1+randn(1,1000);
% hist_overlap(x, y)
% alpha(0.6);
%% Argument validation
if nargin < 2
error('At least 2 arguments required');
end
if nargout > 0
% The function returns a value to a variable
if ~exist('vplot','var') || isempty(vplot)
vplot = false;
end
else
vplot = true;
end
% Number of bins
if ~exist('nBinsX','var') || isempty(nBinsX)
nBinsX = 40;
end
if ~exist('nBinsY','var') || isempty(nBinsY)
nBinsY = 40;
end
% colors
if ~exist('color1','var') || isempty(color1)
color1 = 'black';
end
if ~exist('color2','var') || isempty(color2)
color2 = 'blue';
end
if ~exist('color3','var') || isempty(color3)
color3 = 'yellow';
end
%Compute normalized histograms
[~, ~, heightsX, edgesX] = hist_norm(dataX, nBinsX);
[~, ~, heightsY, edgesY] = hist_norm(dataY, nBinsY);
minEdge = min([edgesX;edgesY]);
maxEdge = max([edgesX;edgesY]);
%Stair series
stairxX = [minEdge; edgesX; edgesX(end); edgesX(end)];
stairxY = [minEdge; edgesY; edgesY(end); edgesY(end)];
stairyX = [0; heightsX; heightsX(end); 0];
stairyY = [0; heightsY; heightsY(end); 0];
%Concatenate all the unique edges and its heights
%Unique edges
uEdgesX = [edgesX(1:2:end);edgesX(end)];
uEdgesY = [edgesY(1:2:end);edgesY(end)];
%Concatenate and sort Edges
edgesAll = sort([uEdgesX;uEdgesY]);
%Number of Edges and Heights expeted in the result
nEdgesAll = numel(edgesAll);
heightsOv = zeros([(nEdgesAll-1)*2,1]);
edgesOv = heightsOv;
k =1;
for iEdge = 1:nEdgesAll-1
centerOv = mean(edgesAll(iEdge:iEdge+1));
%Find the value of height for both staris and select the minimum
indexX = find(stairxX<=centerOv,1,'last');
valueX = stairyX(indexX);
indexY = find(stairxY<=centerOv,1,'last');
valueY = stairyY(indexY);
heightsOv(k) = min(valueX,valueY);
heightsOv(k+1) = heightsOv(k);
edgesOv(k) = edgesAll(iEdge);
edgesOv(k+1) = edgesAll(iEdge+1);
k = k+2;
end
%plot empiricial pdf (for dataX and dataY) its contour and
%the overlaping area
if ~vplot
% False vplot (Verbose Plot)
else
area(edgesX, heightsX, 'FaceColor',color1)
hold on
xlim([minEdge,maxEdge]);
area(edgesY, heightsY,'FaceColor',color2)
area(edgesOv, heightsOv, 'FaceColor',color3,'EdgeColor','none')
stairs(stairxX, stairyX, ':r');
stairs(stairxY, stairyY, '--r');
stairs(edgesOv, heightsOv, '-k');
hold off;
end
%Overlap as fraction of 1
overlap = trapz(edgesOv,heightsOv);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment