Skip to content

Instantly share code, notes, and snippets.

@Flash3001
Last active March 30, 2017 00:28
Show Gist options
  • Save Flash3001/d50a6b43bba7bc61e3d85734e40dbed9 to your computer and use it in GitHub Desktop.
Save Flash3001/d50a6b43bba7bc61e3d85734e40dbed9 to your computer and use it in GitHub Desktop.
Distinct a list of strings ignoring diacritics and cases
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Running;
using System;
using System.Collections.Generic;
using System.Globalization;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace UnicodeDistinct
{
public class Program
{
#region Comparers
class LengthHash : IEqualityComparer<string>
{
public bool Equals(string x, string y)
{
if (x == null || y == null) return x == y;
var xt = x.Trim();
var yt = y.Trim();
return xt.Length == yt.Length && Culture.CompareInfo.IndexOf(xt, yt, CompareOptions.IgnoreNonSpace | CompareOptions.IgnoreCase) >= 0;
}
public int GetHashCode(string obj) => obj?.Trim().Length ?? 1;
}
class IgnoreHash : IEqualityComparer<string>
{
public bool Equals(string x, string y)
{
if (x == null || y == null) return x == y;
var xt = x.Trim();
var yt = y.Trim();
return xt.Length == yt.Length && Culture.CompareInfo.IndexOf(xt, yt, CompareOptions.IgnoreNonSpace | CompareOptions.IgnoreCase) >= 0;
}
public int GetHashCode(string obj) => 1;
}
class NormalizedHash : IEqualityComparer<string>
{
public bool Equals(string x, string y)
{
if (x == null || y == null) return x == y;
var xt = x.Trim();
var yt = y.Trim();
return xt.Length == yt.Length && Culture.CompareInfo.IndexOf(xt, yt, CompareOptions.IgnoreNonSpace | CompareOptions.IgnoreCase) >= 0;
}
public int GetHashCode(string obj) => obj?.Trim().Normalize().ToUpperInvariant().GetHashCode() ?? 1;
}
#endregion
#region Benchmarks
[Benchmark]
public List<String> RunDefault() => Itens.Distinct().ToList();
[Benchmark]
public List<String> RunHashAsLength() => Itens.Distinct(new LengthHash()).ToList();
[Benchmark]
public List<String> RunIgnoreHash() => Itens.Distinct(new IgnoreHash()).ToList();
[Benchmark]
public List<String> RunNormalizedHash() => Itens.Distinct(new NormalizedHash()).ToList();
[Benchmark]
public List<String> RunTrimAndCompareWithStringComparer() => Itens.Select(c => c?.Trim()).Distinct(StringComparer.InvariantCultureIgnoreCase).ToList();
#endregion
static CultureInfo Culture = CultureInfo.CurrentCulture;
static readonly string[] Itens = new string[] { "hi", " hi ", "HI", "hí", " Hî", "hi hi", " hí hí ", "olá", "OLÁ", " olá ", "", "ola", "hola", " holà ", "aaaa", "áâàa", " aâàa ", "áaàa", "áâaa ", "aaaa ", "áâaa", "áâaa", };
static void Main(string[] args)
{
BenchmarkRunner.Run<Program>();
var p = new Program();
Console.WriteLine($"{p.RunDefault().Count} Default: {string.Join(", ", p.RunDefault())}");
Console.WriteLine($"{p.RunHashAsLength().Count} HashAsLength: {string.Join(", ", p.RunHashAsLength())}");
Console.WriteLine($"{p.RunIgnoreHash().Count} IgnoreHash: {string.Join(", ", p.RunIgnoreHash())}");
Console.WriteLine($"{p.RunNormalizedHash().Count} NormalizedHash: {string.Join(", ", p.RunNormalizedHash())}");
Console.WriteLine($"{p.RunTrimAndCompareWithStringComparer().Count} RunTrimAndCompareWithStringComparer: {string.Join(", ", p.RunTrimAndCompareWithStringComparer())}");
Console.ReadLine();
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment