Skip to content

Instantly share code, notes, and snippets.

@keckelt
Last active December 27, 2017 13:04
Show Gist options
  • Save keckelt/f268ad0d99217c8a77fc7edd24ede461 to your computer and use it in GitHub Desktop.
Save keckelt/f268ad0d99217c8a77fc7edd24ede461 to your computer and use it in GitHub Desktop.
Split normal distributions
close all
%load test set:
%load('female.mat')
%load('male.mat')
% get normaly distributed age
female = floor(normrnd(66,8,1,100));
male = floor(normrnd(58,5,1,80));
% count each age. shortens array to te respective maximal age
% first element is age 1, last element is max(female) / max(male)
binnedFemale = accumarray(female(:),1);
binnedMale = accumarray(male(:),1);
first = min(min(female),min(male))-1; %lowest age of male & female (used to trim charts to relevent section)
last = max(max(female), max(male))+1; % highest age of male & female (used to make array lengths equal)
% make length equal by padding with zeros
binnedFemale(length(binnedFemale)+1:last) = 0;
binnedMale(length(binnedMale)+1:last) = 0;
% remove leading zeros (indexes below minimal age)
binnedFemale = binnedFemale(first:last);
binnedMale = binnedMale(first:last);
x = first:last; % x axis
% aggregated histogram (for age >= x)
aggHistFemale = cumsum(binnedFemale); % sum up
aggHistMale = cumsum(binnedMale);
% aggregated histogram (for age < = x)
aggHistFemale2 = cumsum(flipud(binnedFemale)); %reverse vector to sum up from end to start
aggHistMale2 = cumsum(flipud(binnedMale));
femaleJaccards1 = aggHistFemale./(max(aggHistFemale)+aggHistMale);
femaleJaccards2 = flipud(aggHistFemale2./(max(aggHistFemale2)+aggHistMale2)); %reverse result vector back to first:last orderig
[femaleJaccard1, femaleSplitIndex1] = max(femaleJaccards1);
[femaleJaccard2, femaleSplitIndex2] = max(femaleJaccards2);
if (femaleJaccard1 >= femaleJaccard2)
femaleJaccard = femaleJaccard1;
femaleStart = first; % from first
femaleSplit = x(femaleSplitIndex1); % to split
else
femaleJaccard = femaleJaccard2;
femaleStart = last; % from last
femaleSplit = x(femaleSplitIndex2); %to split
end
X = sprintf('Female: Jaccard Score of %f with region from %d to %d.', femaleJaccard, femaleStart, femaleSplit);
disp(X)
maleJaccards1 = aggHistMale./(max(aggHistMale)+aggHistFemale);
maleJaccards2 = flipud(aggHistMale2./(max(aggHistMale2)+aggHistFemale2)); %reverse result vector back to first:last orderig
[maleJaccard1, maleSplitIndex1] = max(maleJaccards1);
[maleJaccard2, maleSplitIndex2] = max(maleJaccards2);
if (maleJaccard1 >= maleJaccard2)
maleJaccard = maleJaccard1;
maleStart = first; % from first
maleSplit = x(maleSplitIndex1); %to split
else
maleJaccard = maleJaccard2;
maleStart = last; % from last
maleSplit = x(maleSplitIndex2); %to split
end
X = sprintf('Male: Jaccard Score of %f with region from %d to %d.', maleJaccard, maleStart, maleSplit);
disp(X)
%plot histograms:
figure('pos',[0 500 900 500])
subplot(2,1,1)
bar(x, binnedFemale,'g')
title('female')
%line([femaleSplit femaleSplit], [0 max(binnedFemale)], 'Color','green','LineStyle',':');
line([maleSplit maleSplit], [0 max(binnedFemale)], 'Color','blue','LineStyle',':');
patch('vertices', [femaleStart, 0; femaleSplit, 0; femaleSplit, max(binnedFemale); femaleStart, max(binnedFemale)], ...
'faces', [1, 2, 3, 4], ...
'FaceColor', 'g', ...
'FaceAlpha', 0.15);
subplot(2,1,2)
bar(x, binnedMale, 'b')
title('male')
%line([maleSplit maleSplit], [0 max(binnedMale)], 'Color','blue','LineStyle',':');
line([femaleSplit femaleSplit], [0 max(binnedMale)], 'Color','green','LineStyle',':');
patch('vertices', [maleStart, 0; maleSplit, 0; maleSplit, max(binnedMale); maleStart, max(binnedMale)], ...
'faces', [1, 2, 3, 4], ...
'FaceColor', 'b', ...
'FaceAlpha', 0.15);
figure('pos',[0 0 900 450])
subplot(2,1,1)
plot(x, aggHistFemale, 'g', x, aggHistMale, 'b')
title('aggregated histograms (age increasing)')
line([femaleSplit femaleSplit], [0 max(max(aggHistFemale), max(aggHistMale))], 'Color','green','LineStyle',':');
line([maleSplit maleSplit], [0 max(max(aggHistFemale), max(aggHistMale))], 'Color','blue','LineStyle',':');
subplot(2,1,2)
plot(x, aggHistFemale2, 'g', x, aggHistMale2, 'b')
set(gca, 'xdir', 'reverse')
title('aggregated histograms (age decreasing)')
line([femaleSplit femaleSplit], [0 max(max(aggHistFemale), max(aggHistMale))], 'Color','green','LineStyle',':');
line([maleSplit maleSplit], [0 max(max(aggHistFemale), max(aggHistMale))], 'Color','blue','LineStyle',':');
figure('pos',[910 0 1000 1000])
diffMin = 0;
subplot(2,1,1)
diffMax = max([max(femaleJaccards1), max(maleJaccards1)]);
plot(x, femaleJaccards1, 'g', x, maleJaccards1, 'b')
title('jaccard scores "Age <= x"')
if (femaleJaccard1 >= femaleJaccard2)
line([x(femaleSplitIndex1) x(femaleSplitIndex1)], [diffMin diffMax], 'Color','green','LineStyle',':');
end
if (maleJaccard1 >= maleJaccard2)
line([x(maleSplitIndex1) x(maleSplitIndex1)], [diffMin diffMax], 'Color','blue','LineStyle',':');
end
subplot(2,1,2)
diffMax = max([max(femaleJaccards2), max(maleJaccards2)]);
plot(x, femaleJaccards2, 'g', x, maleJaccards2, 'b')
set ( gca, 'xdir', 'reverse' )
title('jaccard scores "Age >= x"')
if (femaleJaccard2 > femaleJaccard1)
line([x(femaleSplitIndex2) x(femaleSplitIndex2)], [diffMin diffMax], 'Color','green','LineStyle',':');
end
if (maleJaccard2 >= maleJaccard1)
line([x(maleSplitIndex2) x(maleSplitIndex2)], [diffMin diffMax], 'Color','blue','LineStyle',':');
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment