Area | A | ?? | B | Out A | Out B |
---|---|---|---|---|---|
Azure Blob Storage | I | != | i | ||
Azure Blob Storage | ı | != | i | ||
Azure Blob Storage | ı | != | I | ||
Azure Blob Storage | İ | != | i | ||
Azure Blob Storage | İ | != | I | ||
Azure Blob Storage | İ | != | ı | ||
Lucene LowerCaseFilter | I | == | i | ||
Lucene LowerCaseFilter | ı | != | i | ||
Lucene LowerCaseFilter | ı | != | I | ||
Lucene LowerCaseFilter | İ | == | i | ||
Lucene LowerCaseFilter | İ | == | I | ||
Lucene LowerCaseFilter | İ | != | ı | ||
Lucene LowerInvariantFilter | I | == | i | ||
Lucene LowerInvariantFilter | ı | != | i | ||
Lucene LowerInvariantFilter | ı | != | I | ||
Lucene LowerInvariantFilter | İ | != | i | ||
Lucene LowerInvariantFilter | İ | != | I | ||
Lucene LowerInvariantFilter | İ | != | ı | ||
SQL Server collation SQL_Latin1_General_CP1_CI_AS | I | == | i | ||
SQL Server collation SQL_Latin1_General_CP1_CI_AS | ı | != | i | ||
SQL Server collation SQL_Latin1_General_CP1_CI_AS | ı | != | I | ||
SQL Server collation SQL_Latin1_General_CP1_CI_AS | İ | != | i | ||
SQL Server collation SQL_Latin1_General_CP1_CI_AS | İ | != | I | ||
SQL Server collation SQL_Latin1_General_CP1_CI_AS | İ | != | ı | ||
StringComparison.InvariantCultureIgnoreCase | I | == | i | ||
StringComparison.InvariantCultureIgnoreCase | ı | != | i | ||
StringComparison.InvariantCultureIgnoreCase | ı | != | I | ||
StringComparison.InvariantCultureIgnoreCase | İ | != | i | ||
StringComparison.InvariantCultureIgnoreCase | İ | != | I | ||
StringComparison.InvariantCultureIgnoreCase | İ | != | ı | ||
StringComparison.OrdinalIgnoreCase | I | == | i | ||
StringComparison.OrdinalIgnoreCase | ı | != | i | ||
StringComparison.OrdinalIgnoreCase | ı | != | I | ||
StringComparison.OrdinalIgnoreCase | İ | != | i | ||
StringComparison.OrdinalIgnoreCase | İ | != | I | ||
StringComparison.OrdinalIgnoreCase | İ | != | ı | ||
ToLower() | I | == | i | i | i |
ToLower() | ı | != | i | ı | i |
ToLower() | ı | != | I | ı | i |
ToLower() | İ | == | i | i | i |
ToLower() | İ | == | I | i | i |
ToLower() | İ | != | ı | i | ı |
ToLowerInvariant() | I | == | i | i | i |
ToLowerInvariant() | ı | != | i | ı | i |
ToLowerInvariant() | ı | != | I | ı | i |
ToLowerInvariant() | İ | != | i | İ | i |
ToLowerInvariant() | İ | != | I | İ | i |
ToLowerInvariant() | İ | != | ı | İ | ı |
ToUpper() | I | == | i | I | I |
ToUpper() | ı | == | i | I | I |
ToUpper() | ı | == | I | I | I |
ToUpper() | İ | != | i | İ | I |
ToUpper() | İ | != | I | İ | I |
ToUpper() | İ | != | ı | İ | I |
ToUpperInvariant() | I | == | i | I | I |
ToUpperInvariant() | ı | != | i | ı | I |
ToUpperInvariant() | ı | != | I | ı | I |
ToUpperInvariant() | İ | != | i | İ | I |
ToUpperInvariant() | İ | != | I | İ | I |
ToUpperInvariant() | İ | != | ı | İ | ı |
Windows file system | I | == | i | ||
Windows file system | ı | != | i | ||
Windows file system | ı | != | I | ||
Windows file system | İ | != | i | ||
Windows file system | İ | != | I | ||
Windows file system | İ | != | ı |
Last active
November 23, 2017 06:38
-
-
Save joelverhagen/e4cd4f6d86e3d346cc1141d0a741ef7a to your computer and use it in GitHub Desktop.
Demonstrate the case sensitivity properties of different NuGet.org components
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Collections.Generic; | |
using System.Data.SqlClient; | |
using System.IO; | |
using System.Linq; | |
using Lucene.Net.Analysis; | |
using Lucene.Net.Analysis.Standard; | |
using Lucene.Net.Analysis.Tokenattributes; | |
using Lucene.Net.Documents; | |
using Lucene.Net.Index; | |
using Lucene.Net.QueryParsers; | |
using Lucene.Net.Search; | |
using Lucene.Net.Store; | |
using Microsoft.WindowsAzure.Storage; | |
using Microsoft.WindowsAzure.Storage.Blob; | |
namespace CaseSensitivty | |
{ | |
class Program | |
{ | |
static void Main(string[] args) | |
{ | |
var names = new HashSet<string>(); | |
var results = new HashSet<EqualityResult>(); | |
var ids = new[] { "İ", "ı", "I", "i" }; | |
// StringComparison | |
{ | |
foreach (var pair in GetPairs(ids)) | |
{ | |
var idA = pair.Item1; | |
var idB = pair.Item2; | |
if (string.Equals(idA.ToUpperInvariant(), idB.ToUpperInvariant())) | |
{ | |
results.Add(new EqualityResult( | |
"ToUpperInvariant()", | |
idA, | |
idB)); | |
} | |
if (string.Equals(idA.ToUpper(), idB.ToUpper())) | |
{ | |
results.Add(new EqualityResult( | |
"ToUpper()", | |
idA, | |
idB)); | |
} | |
if (string.Equals(idA.ToLowerInvariant(), idB.ToLowerInvariant())) | |
{ | |
results.Add(new EqualityResult( | |
"ToLowerInvariant()", | |
idA, | |
idB)); | |
} | |
if (string.Equals(idA.ToLower(), idB.ToLower())) | |
{ | |
results.Add(new EqualityResult( | |
"ToLower()", | |
idA, | |
idB)); | |
} | |
if (string.Equals(idA, idB, StringComparison.InvariantCultureIgnoreCase)) | |
{ | |
results.Add(new EqualityResult( | |
$"{nameof(StringComparison)}.{nameof(StringComparison.InvariantCultureIgnoreCase)}", | |
idA, | |
idB)); | |
} | |
if (string.Equals(idA, idB, StringComparison.OrdinalIgnoreCase)) | |
{ | |
results.Add(new EqualityResult( | |
$"{nameof(StringComparison)}.{nameof(StringComparison.OrdinalIgnoreCase)}", | |
idA, | |
idB)); | |
} | |
} | |
} | |
// Lucene | |
{ | |
var directoryInfo = new DirectoryInfo("lucene"); | |
if (directoryInfo.Exists) | |
{ | |
directoryInfo.Delete(recursive: true); | |
} | |
directoryInfo.Create(); | |
using (var directory = new SimpleFSDirectory(directoryInfo)) | |
{ | |
using (var writer = new IndexWriter(directory, new ExampleAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED)) | |
{ | |
foreach (var id in ids) | |
{ | |
writer.AddDocument(CreateDocument(id)); | |
} | |
writer.Commit(); | |
} | |
using (var searcher = new IndexSearcher(directory, readOnly: true)) | |
{ | |
foreach (var id in ids) | |
{ | |
ExecuteQuery(results, searcher, Constants.LowerCaseFilter, id); | |
ExecuteQuery(results, searcher, Constants.LowerInvariantFilter, id); | |
} | |
} | |
} | |
} | |
// File system | |
{ | |
var directoryInfo = new DirectoryInfo("files"); | |
if (directoryInfo.Exists) | |
{ | |
directoryInfo.Delete(recursive: true); | |
} | |
directoryInfo.Create(); | |
foreach (var pair in GetPairs(ids)) | |
{ | |
var idA = pair.Item1; | |
var idB = pair.Item2; | |
var pathA = Path.Combine(directoryInfo.FullName, $"{idA}.txt"); | |
var pathB = Path.Combine(directoryInfo.FullName, $"{idB}.txt"); | |
var expectedContents = Guid.NewGuid().ToString(); | |
File.WriteAllText(pathA, expectedContents); | |
if (!File.Exists(pathB)) | |
{ | |
continue; | |
} | |
var actualContents = File.ReadAllText(pathB); | |
if (actualContents != expectedContents) | |
{ | |
continue; | |
} | |
results.Add(new EqualityResult( | |
"Windows file system", | |
idA, | |
idB)); | |
} | |
} | |
// SQL Server | |
{ | |
var collations = new[] | |
{ | |
"SQL_Latin1_General_CP1_CI_AS", | |
}; | |
foreach (var collation in collations) | |
{ | |
foreach (var pair in GetPairs(ids)) | |
{ | |
var idA = pair.Item1; | |
var idB = pair.Item2; | |
using (var connection = new SqlConnection(@"Data Source=(localdb)\mssqllocaldb; Initial Catalog=NuGetGallery; Integrated Security=True; MultipleActiveResultSets=True")) | |
{ | |
connection.Open(); | |
using (var command = connection.CreateCommand()) | |
{ | |
command.CommandText = $@" | |
DECLARE @Values TABLE | |
( | |
[Value] [NVARCHAR] (255) COLLATE {collation} NOT NULL PRIMARY KEY | |
); | |
INSERT INTO @Values (Value) VALUES (@valueA); | |
SELECT COUNT(*) FROM @Values WHERE Value = @valueB; | |
"; | |
command.Parameters.AddWithValue("@valueA", idA); | |
command.Parameters.AddWithValue("@valueB", idB); | |
var count = (int)command.ExecuteScalar(); | |
var name = $"SQL Server collation {collation}"; | |
names.Add(name); | |
if (count > 0) | |
{ | |
results.Add(new EqualityResult( | |
name, | |
idA, | |
idB)); | |
} | |
} | |
} | |
} | |
} | |
} | |
// Azure Blob Storage | |
{ | |
var storageAccount = CloudStorageAccount.DevelopmentStorageAccount; | |
var blobClient = storageAccount.CreateCloudBlobClient(); | |
var container = blobClient.GetContainerReference("casesensitivty"); | |
container.CreateIfNotExists(); | |
foreach (var blob in container.ListBlobs(useFlatBlobListing: true).OfType<CloudBlockBlob>()) | |
{ | |
blob.Delete(); | |
} | |
var name = "Azure Blob Storage"; | |
names.Add(name); | |
foreach (var pair in GetPairs(ids)) | |
{ | |
var idA = pair.Item1; | |
var idB = pair.Item2; | |
var blobA = container.GetBlockBlobReference($"{idA}.txt"); | |
var blobB = container.GetBlockBlobReference($"{idB}.txt"); | |
var expectedContents = Guid.NewGuid().ToString(); | |
blobA.UploadText(expectedContents); | |
if (!blobB.Exists()) | |
{ | |
continue; | |
} | |
var actualContents = blobB.DownloadText(); | |
if (actualContents != expectedContents) | |
{ | |
continue; | |
} | |
results.Add(new EqualityResult( | |
name, | |
idA, | |
idB)); | |
} | |
} | |
// Display results | |
using (var logStream = new FileStream("log.txt", FileMode.Create)) | |
using (var writer = new StreamWriter(logStream)) | |
{ | |
foreach (var result in results) | |
{ | |
names.Add(result.Name); | |
} | |
writer.WriteLine($"{"Area".PadRight(names.Max(x => x.Length), ' ')} | A | ?? | B | Out A | Out B"); | |
writer.WriteLine($"{new string('-', names.Max(x => x.Length))} | - | -- | - | ----- | -----"); | |
foreach (var name in names.OrderBy(x => x)) | |
{ | |
foreach (var pair in GetPairs(ids)) | |
{ | |
var idA = pair.Item1; | |
var idB = pair.Item2; | |
var equals = results.Contains(new EqualityResult(name, idA, idB)); | |
writer.Write($"{name.PadRight(names.Max(x => x.Length), ' ')} | {idA} | {(equals ? "==" : "!=")} | {idB} | "); | |
switch (name) | |
{ | |
case "ToLower()": | |
writer.Write($"{idA.ToLower()} | {idB.ToLower()}"); | |
break; | |
case "ToUpper()": | |
writer.Write($"{idA.ToUpper()} | {idB.ToUpper()}"); | |
break; | |
case "ToLowerInvariant()": | |
writer.Write($"{idA.ToLowerInvariant()} | {idB.ToLowerInvariant()}"); | |
break; | |
case "ToUpperInvariant()": | |
writer.Write($"{idA.ToUpperInvariant()} | {idB.ToUpperInvariant()}"); | |
break; | |
default: | |
writer.Write($" |"); | |
break; | |
} | |
writer.WriteLine(); | |
} | |
} | |
} | |
} | |
private static IEnumerable<Tuple<string, string>> GetPairs(IReadOnlyList<string> values) | |
{ | |
var pairs = new List<Tuple<string, string>>(); | |
for (var a = 0; a < values.Count; a++) | |
{ | |
for (var b = a + 1; b < values.Count; b++) | |
{ | |
var idA = values[a]; | |
var idB = values[b]; | |
pairs.Add(Tuple.Create(idA, idB)); | |
} | |
} | |
return pairs | |
.OrderBy(x => x.Item1) | |
.ThenBy(x => x.Item2); | |
} | |
private static void ExecuteQuery(HashSet<EqualityResult> results, IndexSearcher searcher, string field, string id) | |
{ | |
var queryParser = new QueryParser( | |
Lucene.Net.Util.Version.LUCENE_30, | |
Constants.Id, | |
new ExampleAnalyzer()); | |
var queryString = $"+{field}:{id}"; | |
var query = queryParser.Parse(queryString); | |
var queryResults = searcher.Search(query, 100); | |
foreach (var queryResult in queryResults.ScoreDocs) | |
{ | |
results.Add(new EqualityResult( | |
$"Lucene {field}", | |
id, | |
searcher.Doc(queryResult.Doc).GetField(Constants.Id).StringValue)); | |
} | |
} | |
private static Document CreateDocument(string id) | |
{ | |
var document = new Document(); | |
document.Add(new Field( | |
Constants.Id, | |
id, | |
Field.Store.YES, | |
Field.Index.ANALYZED, | |
Field.TermVector.WITH_POSITIONS_OFFSETS)); | |
document.Add(new Field( | |
Constants.LowerCaseFilter, | |
id, | |
Field.Store.YES, | |
Field.Index.ANALYZED, | |
Field.TermVector.WITH_POSITIONS_OFFSETS)); | |
document.Add(new Field( | |
Constants.LowerInvariantFilter, | |
id, | |
Field.Store.YES, | |
Field.Index.ANALYZED, | |
Field.TermVector.WITH_POSITIONS_OFFSETS)); | |
return document; | |
} | |
} | |
public static class Constants | |
{ | |
public const string Id = "Id"; | |
public const string LowerCaseFilter = "LowerCaseFilter"; | |
public const string LowerInvariantFilter = "LowerInvariantFilter"; | |
} | |
public class ExampleAnalyzer : PerFieldAnalyzerWrapper | |
{ | |
private static readonly IReadOnlyDictionary<string, Analyzer> FieldAnalyzers = new Dictionary<string, Analyzer> | |
{ | |
{ Constants.Id, new IdAnalyzer(t => t) }, | |
{ Constants.LowerCaseFilter, new IdAnalyzer(t => new LowerCaseFilter(t)) }, | |
{ Constants.LowerInvariantFilter, new IdAnalyzer(t => new LowerInvariantFilter(t)) }, | |
}; | |
public ExampleAnalyzer() : base( | |
new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30), | |
FieldAnalyzers) | |
{ | |
} | |
} | |
public sealed class LowerInvariantFilter : TokenFilter | |
{ | |
private readonly ITermAttribute termAttribute; | |
public LowerInvariantFilter(TokenStream input) : base(input) | |
{ | |
termAttribute = AddAttribute<ITermAttribute>(); | |
} | |
public override bool IncrementToken() | |
{ | |
if (input.IncrementToken()) | |
{ | |
var buffer = termAttribute.TermBuffer(); | |
var length = termAttribute.TermLength(); | |
for (int i = 0; i < length; i++) | |
{ | |
buffer[i] = char.ToLowerInvariant(buffer[i]); | |
} | |
return true; | |
} | |
return false; | |
} | |
} | |
public class IdAnalyzer : Analyzer | |
{ | |
private readonly Func<TokenStream, TokenStream> _wrapTokenStream; | |
public IdAnalyzer(Func<TokenStream, TokenStream> wrapTokenStream) | |
{ | |
_wrapTokenStream = wrapTokenStream; | |
} | |
public override TokenStream TokenStream(string fieldName, TextReader reader) | |
{ | |
return _wrapTokenStream(new KeywordTokenizer(reader)); | |
} | |
} | |
public class EqualityResult : IEquatable<EqualityResult> | |
{ | |
private readonly int _hashCode; | |
public EqualityResult(string name, string a, string b) | |
{ | |
if (a.CompareTo(b) > 0) | |
{ | |
var temp = b; | |
b = a; | |
a = temp; | |
} | |
Name = name; | |
A = a; | |
B = b; | |
_hashCode = $"{Name}/{a}/{b}".GetHashCode(); | |
} | |
public string Name { get; } | |
public string A { get; } | |
public string B { get; } | |
public bool Equals(EqualityResult other) | |
{ | |
if (other == null) | |
{ | |
return false; | |
} | |
return Name == other.Name | |
&& A == other.A | |
&& B == other.B; | |
} | |
public override bool Equals(object obj) | |
{ | |
return Equals(obj as EqualityResult); | |
} | |
public override int GetHashCode() | |
{ | |
return _hashCode; | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment