Skip to content

Instantly share code, notes, and snippets.

@Flash3001
Last active March 30, 2017 19:54
Show Gist options
  • Save Flash3001/9273494a8d94d7d9ae99cbcd8dac780a to your computer and use it in GitHub Desktop.
Save Flash3001/9273494a8d94d7d9ae99cbcd8dac780a to your computer and use it in GitHub Desktop.
Distinct a list of strings ignoring diacritics and cases - v2
using System;
using System.Collections.Generic;
using System.Globalization;
using System.Linq;
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Running;
namespace UnicodeDistinct
{
public class Program
{
#region Comparers
class LengthHash : IEqualityComparer<string>
{
public bool Equals(string x, string y) => x == null || y == null ? x == y : Culture.CompareInfo.IndexOf(x, y, Options) >= 0;
public int GetHashCode(string obj) => obj?.Length ?? 0;
}
class IgnoreHash : IEqualityComparer<string>
{
public bool Equals(string x, string y) => x == null || y == null ? x == y : x.Length == y.Length && Culture.CompareInfo.IndexOf(x, y, Options) >= 0;
public int GetHashCode(string obj) => 1;
}
#endregion
#region Benchmarks
[Benchmark]
public List<String> RunDefault() => Itens.Select(c=> c?.Trim().Normalize()).Distinct().ToList();
[Benchmark]
public List<String> RunHashAsLength() => Itens.Select(c => c?.Trim().Normalize()).Distinct(new LengthHash()).ToList();
[Benchmark]
public List<String> RunIgnoreHash() => Itens.Select(c => c?.Trim().Normalize()).Distinct(new IgnoreHash()).ToList();
[Benchmark]
public List<String> RunStringComparer() => Itens.Select(c => c?.Trim().Normalize()).Distinct(StringComparer.InvariantCultureIgnoreCase).ToList();
#endregion
static CultureInfo Culture = CultureInfo.InvariantCulture;
static CompareOptions Options = CompareOptions.IgnoreNonSpace | CompareOptions.IgnoreCase | CompareOptions.IgnoreSymbols;
static readonly string[] Itens = new string[] { "\u212B", "\u00C5", "\u0041\u030A", "hi", " hi ", "HI", "hí", " Hî", "hi hi", " hí hí ", "olá", "OLÁ", " olá ", "", "ola", "hola", " holà ", "aaaa", "áâàa", " aâàa ", "áaàa", "áâaa ", "aaaa ", "áâaa", "áâaa", };
static void Main(string[] args)
{
BenchmarkRunner.Run<Program>();
var p = new Program();
Console.WriteLine($"{p.RunDefault().Count} Default: {string.Join(", ", p.RunDefault())}");
Console.WriteLine($"{p.RunHashAsLength().Count} HashAsLength: {string.Join(", ", p.RunHashAsLength())}");
Console.WriteLine($"{p.RunIgnoreHash().Count} IgnoreHash: {string.Join(", ", p.RunIgnoreHash())}");
Console.WriteLine($"{p.RunStringComparer().Count} RunStringComparer: {string.Join(", ", p.RunStringComparer())}");
Console.ReadLine();
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment