Last active
August 29, 2015 14:02
-
-
Save yar-shukan/2c3bb818cdb168ef7c72 to your computer and use it in GitHub Desktop.
CollationComparer
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Collections.Generic; | |
using System.Globalization; | |
using System.Reflection; | |
using Microsoft.SqlServer.Management.SqlParser.Metadata; | |
public class CollationComparer : IEqualityComparer<string>, IComparer<string> | |
{ | |
private readonly CollationInfo _collationInfo; | |
private readonly CompareOptions _compareOptions; | |
private readonly Func<string, CompareOptions, bool, long, int> _getHashCodeOfString; | |
private readonly CultureInfo _cultureInfo; | |
public SqlCollationComparer(CollationInfo collationInfo) | |
{ | |
_collationInfo = collationInfo; | |
_compareOptions = TypeExtensions.GetInstancePrivateFieldValue<CollationInfo, CompareOptions>(_collationInfo); | |
var collationCompareInfo = TypeExtensions.GetInstancePrivateFieldValue<CollationInfo, CompareInfo>(_collationInfo); | |
var getHashCodeOfString = collationCompareInfo.GetType() | |
.GetMethod("GetHashCodeOfString", | |
BindingFlags.Instance | BindingFlags.NonPublic, | |
null, | |
new[] { typeof(string), typeof(CompareOptions), typeof(bool), typeof(long) }, | |
null); | |
_getHashCodeOfString = (Func<string, CompareOptions, bool, long, int>)Delegate.CreateDelegate( | |
typeof(Func<string, CompareOptions, bool, long, int>), collationCompareInfo, getHashCodeOfString); | |
_cultureInfo = CultureInfo.GetCultureInfo(collationCompareInfo.LCID); | |
} | |
public CompareOptions CompareOptions { get { return _compareOptions; } } | |
public CultureInfo CultureInfo { get { return _cultureInfo; } } | |
public bool Equals(string x, string y) | |
{ | |
return _collationInfo.EqualityComparer.Equals(x, y); | |
} | |
public int GetHashCode(string obj) | |
{ | |
//this hack is needed because of oddity of the internals of CultureAwareComparer.GetHashCode(string obj) | |
return _getHashCodeOfString(obj, CompareOptions, false, 0L); //3rd and 4th param is from dissasembled CompareInfo | |
} | |
public int Compare(string x, string y) | |
{ | |
return _collationInfo.Comparer.Compare(x, y); | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System.Collections.Concurrent; | |
using System.Collections.Generic; | |
using System.Collections.Specialized; | |
using System.Globalization; | |
using System.Linq; | |
static class ListOfStringEqualityPitfalls | |
{ | |
/// <summary> | |
/// All <see cref="string"/>'s instance methods that can't operate in AI (accent insesetive) style | |
/// </summary> | |
public static void No_AI_Support_OrdinalComparison_StringInstanceMethods(string strA, string strB) | |
{ | |
//not possible to operate on strings in AI (accent insesetive) style on all the methods | |
strA.Equals(strB); | |
strA.GetHashCode(); | |
strA.IndexOfAny(strB.ToCharArray()); | |
strA.LastIndexOfAny(strB.ToCharArray()); | |
strA.Replace('a', 'b'); | |
strA.Split('a'); | |
strA.TrimStart('ф'); | |
strA.TrimEnd('a'); | |
} | |
/// <summary> | |
/// All <see cref="string"/>'s static methods that can't operate in AI (accent insesetive) style | |
/// </summary> | |
public static void No_AI_Support_OrdinalComparison_StringStaticMethods(string strA, string strB) | |
{ | |
string.CompareOrdinal(strA, strB); | |
} | |
/// <summary> | |
/// <see cref="string"/> instance methods that use <seealso cref="CultureInfo.CurrentCulture"/> | |
/// implicitly in the implementation. Also no one can be used with <see cref="CompareOptions.IgnoreNonSpace"/> | |
/// because they simply don't have this overload | |
/// All <see cref="string"/>'s instance methods can't operate in AI (accent insesetive) style | |
/// </summary> | |
public static void No_AI_Support_ImplicitCurrentCulture_StringInstanceMethods(string strA, string strB) | |
{ | |
strA.Contains(strB); | |
strA.CompareTo(strB); | |
strA.EndsWith(strB); | |
strA.IndexOf(strB); | |
strA.LastIndexOf(strB); | |
strA.StartsWith(strB); | |
strA.ToLower(); | |
strA.ToUpper(); | |
} | |
/// <summary> | |
/// The methods that accept <see cref="CompareOptions"/> as parameter, so the string comparison | |
/// can be made CI and AI. | |
/// </summary> | |
public static void With_AI_Support_ShouldPassCulture_StringMethods(string strA, string strB) | |
{ | |
string.Compare(strA, strB, CultureInfo.CurrentCulture, CompareOptions.IgnoreNonSpace | CompareOptions.IgnoreCase); | |
} | |
/// <summary> | |
/// CompareInfo methods, that can manipulate with string using <see cref="CompareOptions"/> | |
/// parameters | |
/// </summary> | |
public static void With_AI_Support_CulturesCompareInfoMethods(string strA, string strB) | |
{ | |
CompareInfo compareInfo = CultureInfo.CurrentCulture.CompareInfo; | |
compareInfo.Compare(strA, strB, CompareOptions.IgnoreNonSpace | CompareOptions.IgnoreCase); | |
compareInfo.IndexOf(strA, strB, CompareOptions.IgnoreNonSpace | CompareOptions.IgnoreCase); | |
compareInfo.LastIndexOf(strA, strB, CompareOptions.IgnoreNonSpace | CompareOptions.IgnoreCase); | |
} | |
public static void Implicit_StringMethodCalls_In_Collections() | |
{ | |
const string value = "abc"; | |
var other = new[] { "äbc" }; | |
/////////////// ALL GENERIC COLLECTIONS' METHODS GO HERE //////////////////////////////////////// | |
IList<string> iList = /* everything that can be in right applies */ other; | |
iList.IndexOf(value); | |
ICollection<string> iCollection = /* everything that can be in right applies */ other.ToList(); | |
iCollection.Contains(value); | |
iCollection.Remove(value); | |
IDictionary<string, object> iDictionary = /* everything that can be in right applies */ other.ToDictionary(s => s, s => (object)s); | |
iDictionary.Add(value, 123); | |
iDictionary.ContainsKey(value); | |
iDictionary.Remove(new KeyValuePair<string, object>(value, 123)); | |
object obj; | |
iDictionary.TryGetValue(value, out obj); | |
object o = iDictionary[value]; | |
ISet<string> iSet = /* everything that can be in right applies */ new HashSet<string>(); | |
iSet.Add(value); | |
iSet.ExceptWith(other); | |
iSet.IntersectWith(other); | |
iSet.IsProperSubsetOf(other); | |
iSet.IsProperSupersetOf(other); | |
iSet.IsSubsetOf(other); | |
iSet.IsSupersetOf(other); | |
iSet.Overlaps(other); | |
iSet.SetEquals(other); | |
iSet.SymmetricExceptWith(other); | |
///////// CUSTOM ON SINGLE CLASS BASIS METHODS /////////////////////////////////////////////////////// | |
var list = new List<string>(); | |
list.BinarySearch(value); | |
list.LastIndexOf(value); //not possible to operate on strings in AI CI style | |
list.Sort(); | |
var linkedList = new LinkedList<string>(); | |
linkedList.Find(value); //not possible to operate on strings in AI CI style | |
linkedList.FindLast(value); //not possible to operate on strings in AI CI style | |
var sortedList = new SortedList<string, object>(); | |
sortedList.IndexOfKey(value); | |
///// http://msdn.microsoft.com/ru-ru/library/system.collections(v=vs.100).aspx !!!! Same list for non-generic versions !!!! ////// | |
// LESS POSSIBLY THAT THEY CAN BE USED IN THE CODEBASE, BUT LIST THEM FOR FULL PICTURE | |
IProducerConsumerCollection<string> pcc = /* everything that can be in right applies */ new ConcurrentBag<string>(); | |
string dummy; | |
pcc.TryTake(out dummy); //not possible to operate on strings in AI CI style | |
var concurrentDictionary = new ConcurrentDictionary<string, object>(); | |
concurrentDictionary.AddOrUpdate(value, 123, (s1, o1) => null); | |
concurrentDictionary.GetOrAdd(value, null); | |
concurrentDictionary.TryAdd(value, null); | |
concurrentDictionary.TryGetValue(value, out obj); | |
concurrentDictionary.TryRemove(value, out obj); | |
concurrentDictionary.TryUpdate(value, null, null); | |
object o2 = concurrentDictionary[value]; | |
var nameValueCollection = new NameValueCollection(10); | |
nameValueCollection.Get(value); //not possible to operate on strings in AI CI style | |
nameValueCollection.GetValues(value); //not possible to operate on strings in AI CI style | |
/// !! LINQ ON IEnumerable<String> | |
IEnumerable<string> enumerable = other; | |
enumerable.Distinct(); | |
enumerable.Contains("abc"); | |
enumerable.Count(); //not possible to operate on strings in AI CI style | |
enumerable.GroupBy(s => s); | |
enumerable.GroupJoin(other, s => s, s => s, (s, enumerable1) => enumerable1); | |
enumerable.Except(other); | |
enumerable.Intersect(other); | |
enumerable.Join(other, s => s, s => s, (s, s1) => s); | |
enumerable.LongCount(); //not possible to operate on strings in AI CI style | |
enumerable.OrderBy(s => s); | |
enumerable.OrderByDescending(s => s); | |
enumerable.SequenceEqual(other); | |
enumerable.ToDictionary(s => s); | |
enumerable.ToLookup(s => s); | |
enumerable.Union(other); | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Used collation: Latin1_General_100_CI_AI_KS_WS | |
aa, hashcode: 2053722942 | |
AA, hashcode: 2053722942 | |
äå, hashcode: -266555795 | |
ÄÅ, hashcode: -266555795 | |
Are all equals? False | |
Used collation: Latin1_General_100_CI_AI_KS_WS custom IEqualityComparer implementation | |
aa, hashcode: 2053722942 | |
AA, hashcode: 2053722942 | |
äå, hashcode: 2053722942 | |
ÄÅ, hashcode: 2053722942 | |
Are all equals? True |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Collections.Generic; | |
using System.Diagnostics; | |
using System.IO; | |
using System.Linq; | |
using System.Text; | |
using Microsoft.SqlServer.Management.SqlParser.Metadata; | |
static class Program | |
{ | |
static void Main(string[] args) | |
{ | |
PotentialSolution(); | |
const string strA = "whatever"; | |
const string strB = "whatever1"; | |
ListOfStringEqualityPitfalls.No_AI_Support_OrdinalComparison_StringInstanceMethods(strA, strB); | |
ListOfStringEqualityPitfalls.With_AI_Support_ShouldPassCulture_StringMethods(strA, strB); | |
ListOfStringEqualityPitfalls.With_AI_Support_CulturesCompareInfoMethods(strA, strB); | |
ListOfStringEqualityPitfalls.No_AI_Support_OrdinalComparison_StringStaticMethods(strA, strB); | |
ListOfStringEqualityPitfalls.No_AI_Support_ImplicitCurrentCulture_StringInstanceMethods(strA, strB); | |
ListOfStringEqualityPitfalls.Implicit_StringMethodCalls_In_Collections(); | |
} | |
private static void PotentialSolution() | |
{ | |
const string outputPath = "output.txt"; | |
const string latin1GeneralCiAiKsWs = "Latin1_General_100_CI_AI_KS_WS"; | |
using (FileStream fileStream = File.Open(outputPath, FileMode.Create, FileAccess.Write)) | |
{ | |
using (var streamWriter = new StreamWriter(fileStream, Encoding.UTF8)) | |
{ | |
string[] strings = { "aa", "AA", "äå", "ÄÅ" }; | |
CollationInfo collationInfo = CollationInfo.GetCollationInfo(latin1GeneralCiAiKsWs); | |
PrintHashCodes(latin1GeneralCiAiKsWs, collationInfo.EqualityComparer, streamWriter, strings); | |
PrintHashCodes(latin1GeneralCiAiKsWs + " custom IEqualityComparer implementation", new CollationComparer(collationInfo), streamWriter, strings); | |
} | |
} | |
Process.Start(outputPath); | |
} | |
private static void PrintHashCodes(string collation, IEqualityComparer<string> equalityComparer, TextWriter writer, params string[] strings) | |
{ | |
writer.WriteLine(Environment.NewLine + "Used collation: {0}", collation + Environment.NewLine); | |
foreach (string s in strings) | |
{ | |
writer.WriteLine("{0}, hashcode: {1}", s, equalityComparer.GetHashCode(s).ToString().PadLeft(10)); | |
} | |
writer.WriteLine("Are all equals? {0}", strings.Distinct(equalityComparer).Count() == 1); | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Linq; | |
using System.Reflection; | |
public static class TypeExtensions | |
{ | |
public static TResult GetInstancePrivateFieldValue<TInstance, TResult>(TInstance obj) where TInstance : class | |
{ | |
Type type = obj.GetType(); | |
while (type != null) | |
{ | |
FieldInfo fieldInfo = type.GetFields(BindingFlags.Instance | BindingFlags.NonPublic) | |
.FirstOrDefault(fi => fi.FieldType == typeof(TResult)); | |
if (fieldInfo != null) | |
{ | |
return (TResult)fieldInfo.GetValue(obj); | |
} | |
type = type.BaseType; | |
} | |
throw new ArgumentException( | |
string.Format("The class {0} doesn't contain private instance field of type {1} in hierarchy", | |
obj.GetType().FullName, typeof(TResult).FullName)); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment