Last active
March 30, 2017 19:54
-
-
Save Flash3001/9273494a8d94d7d9ae99cbcd8dac780a to your computer and use it in GitHub Desktop.
Distinct a list of strings ignoring diacritics and cases - v2
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Collections.Generic; | |
using System.Globalization; | |
using System.Linq; | |
using BenchmarkDotNet.Attributes; | |
using BenchmarkDotNet.Running; | |
namespace UnicodeDistinct | |
{ | |
public class Program | |
{ | |
#region Comparers | |
class LengthHash : IEqualityComparer<string> | |
{ | |
public bool Equals(string x, string y) => x == null || y == null ? x == y : Culture.CompareInfo.IndexOf(x, y, Options) >= 0; | |
public int GetHashCode(string obj) => obj?.Length ?? 0; | |
} | |
class IgnoreHash : IEqualityComparer<string> | |
{ | |
public bool Equals(string x, string y) => x == null || y == null ? x == y : x.Length == y.Length && Culture.CompareInfo.IndexOf(x, y, Options) >= 0; | |
public int GetHashCode(string obj) => 1; | |
} | |
#endregion | |
#region Benchmarks | |
[Benchmark] | |
public List<String> RunDefault() => Itens.Select(c=> c?.Trim().Normalize()).Distinct().ToList(); | |
[Benchmark] | |
public List<String> RunHashAsLength() => Itens.Select(c => c?.Trim().Normalize()).Distinct(new LengthHash()).ToList(); | |
[Benchmark] | |
public List<String> RunIgnoreHash() => Itens.Select(c => c?.Trim().Normalize()).Distinct(new IgnoreHash()).ToList(); | |
[Benchmark] | |
public List<String> RunStringComparer() => Itens.Select(c => c?.Trim().Normalize()).Distinct(StringComparer.InvariantCultureIgnoreCase).ToList(); | |
#endregion | |
static CultureInfo Culture = CultureInfo.InvariantCulture; | |
static CompareOptions Options = CompareOptions.IgnoreNonSpace | CompareOptions.IgnoreCase | CompareOptions.IgnoreSymbols; | |
static readonly string[] Itens = new string[] { "\u212B", "\u00C5", "\u0041\u030A", "hi", " hi ", "HI", "hí", " Hî", "hi hi", " hí hí ", "olá", "OLÁ", " olá ", "", "ola", "hola", " holà ", "aaaa", "áâàa", " aâàa ", "áaàa", "áâaa ", "aaaa ", "áâaa", "áâaa", }; | |
static void Main(string[] args) | |
{ | |
BenchmarkRunner.Run<Program>(); | |
var p = new Program(); | |
Console.WriteLine($"{p.RunDefault().Count} Default: {string.Join(", ", p.RunDefault())}"); | |
Console.WriteLine($"{p.RunHashAsLength().Count} HashAsLength: {string.Join(", ", p.RunHashAsLength())}"); | |
Console.WriteLine($"{p.RunIgnoreHash().Count} IgnoreHash: {string.Join(", ", p.RunIgnoreHash())}"); | |
Console.WriteLine($"{p.RunStringComparer().Count} RunStringComparer: {string.Join(", ", p.RunStringComparer())}"); | |
Console.ReadLine(); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment