Skip to content

Instantly share code, notes, and snippets.

@joacar
Created June 20, 2018 08:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save joacar/25fe02da14d42ef376b5efe2f41f6cfc to your computer and use it in GitHub Desktop.
Save joacar/25fe02da14d42ef376b5efe2f41f6cfc to your computer and use it in GitHub Desktop.
Accomodate for missing values when rendering a portion of a data set with e.g. Google Pie Chart
/// <summary>
/// Model to work with a subset of a set and render charts and graphs correctly.
/// </summary>
public class PartialSetSlicer
{
/// <summary>
/// Construct a <see cref="PartialSetSlicer" /> with <see cref="Threshold" /> set to the distribution of the last element.
/// </summary>
/// <param name="set">Data set.</param>
/// <param name="size">Size for subset.</param>
public PartialSetSlicer(
IEnumerable<AudienceCountryModel> set,
int size) : this(set, size, 0.05d) => Threshold = (decimal) Countries.Last().Distribution;
public PartialSetSlicer(
IEnumerable<AudienceCountryModel> set,
int size,
double threshold)
{
Countries = set
.OrderByDescending(_ => _.Distribution)
.ThenBy(_ => _.TwoLetterIsoCountryCode)
.Take(size)
.ToList();
Distribution = Countries.Sum(_ => (decimal) _.Distribution);
if (threshold < 0 && threshold > 1)
{
throw new ArgumentException("Threshold must be between 0 and 1", nameof(threshold));
}
Threshold = (decimal) threshold;
}
public IReadOnlyList<AudienceCountryModel> Countries { get; }
/// <summary>
/// Gets the threshold for countries to leave out.
/// </summary>
/// <value>
/// Default is five percent (0.05d)
/// </value>
public decimal Threshold { get; }
/// <summary>
/// Gets the total distribution for those countries that didn't make it to the top five.
/// </summary>
public decimal NotAccountedForDistribution => 1m - Distribution;
/// <summary>
/// Gets the distribution for top five countries.
/// </summary>
public decimal Distribution { get; }
public bool HasUnaccountedDistribution => NotAccountedForDistribution != 0;
/// <summary>
/// Create dummy rows to batch the not accounted for distribution under the "Other" label
/// </summary>
/// <remarks>
/// Google Charts assume the input is relative, sums to 100%. To show e.g. top five, there is some work to be done to
/// create a number of dummy rows that are below the threshold value and are unique. This will batch them as "Other"
/// </remarks>
/// <returns></returns>
public IEnumerable<AudienceCountryModel> DummyRows()
{
if (!HasUnaccountedDistribution)
{
yield break;
}
// Calculate new value slightly less than threshold since they will be displayed if equal
var value = Threshold * 0.99m;
// Determine how much dummy rows we need to add
var glitch = NotAccountedForDistribution - Threshold;
if (glitch < 0)
{
// We simply add one row with distribution equal to not accounted for
yield return new AudienceCountryModel(null, (double) NotAccountedForDistribution);
}
else
{
// More dummy rows needed. More precisely the divider for not accounted for and threshold
var multiplier = (int) Math.Round(NotAccountedForDistribution / Threshold);
// Create dummy rows with unique label
var query = Enumerable.Range(0, multiplier)
.Select(_ => new AudienceCountryModel(_.ToString(), (double) value));
// Left overs
var remainder = NotAccountedForDistribution;
foreach (var audienceCountryModel in query)
{
remainder -= value;
yield return audienceCountryModel;
}
if (remainder > 0m)
{
yield return new AudienceCountryModel(multiplier.ToString(), (double) remainder);
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment