Skip to content

Instantly share code, notes, and snippets.

@joelverhagen
Last active November 23, 2017 06:38
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save joelverhagen/e4cd4f6d86e3d346cc1141d0a741ef7a to your computer and use it in GitHub Desktop.
Save joelverhagen/e4cd4f6d86e3d346cc1141d0a741ef7a to your computer and use it in GitHub Desktop.
Demonstrate the case sensitivity properties of different NuGet.org components
Area A ?? B Out A Out B
Azure Blob Storage I != i
Azure Blob Storage ı != i
Azure Blob Storage ı != I
Azure Blob Storage İ != i
Azure Blob Storage İ != I
Azure Blob Storage İ != ı
Lucene LowerCaseFilter I == i
Lucene LowerCaseFilter ı != i
Lucene LowerCaseFilter ı != I
Lucene LowerCaseFilter İ == i
Lucene LowerCaseFilter İ == I
Lucene LowerCaseFilter İ != ı
Lucene LowerInvariantFilter I == i
Lucene LowerInvariantFilter ı != i
Lucene LowerInvariantFilter ı != I
Lucene LowerInvariantFilter İ != i
Lucene LowerInvariantFilter İ != I
Lucene LowerInvariantFilter İ != ı
SQL Server collation SQL_Latin1_General_CP1_CI_AS I == i
SQL Server collation SQL_Latin1_General_CP1_CI_AS ı != i
SQL Server collation SQL_Latin1_General_CP1_CI_AS ı != I
SQL Server collation SQL_Latin1_General_CP1_CI_AS İ != i
SQL Server collation SQL_Latin1_General_CP1_CI_AS İ != I
SQL Server collation SQL_Latin1_General_CP1_CI_AS İ != ı
StringComparison.InvariantCultureIgnoreCase I == i
StringComparison.InvariantCultureIgnoreCase ı != i
StringComparison.InvariantCultureIgnoreCase ı != I
StringComparison.InvariantCultureIgnoreCase İ != i
StringComparison.InvariantCultureIgnoreCase İ != I
StringComparison.InvariantCultureIgnoreCase İ != ı
StringComparison.OrdinalIgnoreCase I == i
StringComparison.OrdinalIgnoreCase ı != i
StringComparison.OrdinalIgnoreCase ı != I
StringComparison.OrdinalIgnoreCase İ != i
StringComparison.OrdinalIgnoreCase İ != I
StringComparison.OrdinalIgnoreCase İ != ı
ToLower() I == i i i
ToLower() ı != i ı i
ToLower() ı != I ı i
ToLower() İ == i i i
ToLower() İ == I i i
ToLower() İ != ı i ı
ToLowerInvariant() I == i i i
ToLowerInvariant() ı != i ı i
ToLowerInvariant() ı != I ı i
ToLowerInvariant() İ != i İ i
ToLowerInvariant() İ != I İ i
ToLowerInvariant() İ != ı İ ı
ToUpper() I == i I I
ToUpper() ı == i I I
ToUpper() ı == I I I
ToUpper() İ != i İ I
ToUpper() İ != I İ I
ToUpper() İ != ı İ I
ToUpperInvariant() I == i I I
ToUpperInvariant() ı != i ı I
ToUpperInvariant() ı != I ı I
ToUpperInvariant() İ != i İ I
ToUpperInvariant() İ != I İ I
ToUpperInvariant() İ != ı İ ı
Windows file system I == i
Windows file system ı != i
Windows file system ı != I
Windows file system İ != i
Windows file system İ != I
Windows file system İ != ı
using System;
using System.Collections.Generic;
using System.Data.SqlClient;
using System.IO;
using System.Linq;
using Lucene.Net.Analysis;
using Lucene.Net.Analysis.Standard;
using Lucene.Net.Analysis.Tokenattributes;
using Lucene.Net.Documents;
using Lucene.Net.Index;
using Lucene.Net.QueryParsers;
using Lucene.Net.Search;
using Lucene.Net.Store;
using Microsoft.WindowsAzure.Storage;
using Microsoft.WindowsAzure.Storage.Blob;
namespace CaseSensitivty
{
class Program
{
static void Main(string[] args)
{
var names = new HashSet<string>();
var results = new HashSet<EqualityResult>();
var ids = new[] { "İ", "ı", "I", "i" };
// StringComparison
{
foreach (var pair in GetPairs(ids))
{
var idA = pair.Item1;
var idB = pair.Item2;
if (string.Equals(idA.ToUpperInvariant(), idB.ToUpperInvariant()))
{
results.Add(new EqualityResult(
"ToUpperInvariant()",
idA,
idB));
}
if (string.Equals(idA.ToUpper(), idB.ToUpper()))
{
results.Add(new EqualityResult(
"ToUpper()",
idA,
idB));
}
if (string.Equals(idA.ToLowerInvariant(), idB.ToLowerInvariant()))
{
results.Add(new EqualityResult(
"ToLowerInvariant()",
idA,
idB));
}
if (string.Equals(idA.ToLower(), idB.ToLower()))
{
results.Add(new EqualityResult(
"ToLower()",
idA,
idB));
}
if (string.Equals(idA, idB, StringComparison.InvariantCultureIgnoreCase))
{
results.Add(new EqualityResult(
$"{nameof(StringComparison)}.{nameof(StringComparison.InvariantCultureIgnoreCase)}",
idA,
idB));
}
if (string.Equals(idA, idB, StringComparison.OrdinalIgnoreCase))
{
results.Add(new EqualityResult(
$"{nameof(StringComparison)}.{nameof(StringComparison.OrdinalIgnoreCase)}",
idA,
idB));
}
}
}
// Lucene
{
var directoryInfo = new DirectoryInfo("lucene");
if (directoryInfo.Exists)
{
directoryInfo.Delete(recursive: true);
}
directoryInfo.Create();
using (var directory = new SimpleFSDirectory(directoryInfo))
{
using (var writer = new IndexWriter(directory, new ExampleAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED))
{
foreach (var id in ids)
{
writer.AddDocument(CreateDocument(id));
}
writer.Commit();
}
using (var searcher = new IndexSearcher(directory, readOnly: true))
{
foreach (var id in ids)
{
ExecuteQuery(results, searcher, Constants.LowerCaseFilter, id);
ExecuteQuery(results, searcher, Constants.LowerInvariantFilter, id);
}
}
}
}
// File system
{
var directoryInfo = new DirectoryInfo("files");
if (directoryInfo.Exists)
{
directoryInfo.Delete(recursive: true);
}
directoryInfo.Create();
foreach (var pair in GetPairs(ids))
{
var idA = pair.Item1;
var idB = pair.Item2;
var pathA = Path.Combine(directoryInfo.FullName, $"{idA}.txt");
var pathB = Path.Combine(directoryInfo.FullName, $"{idB}.txt");
var expectedContents = Guid.NewGuid().ToString();
File.WriteAllText(pathA, expectedContents);
if (!File.Exists(pathB))
{
continue;
}
var actualContents = File.ReadAllText(pathB);
if (actualContents != expectedContents)
{
continue;
}
results.Add(new EqualityResult(
"Windows file system",
idA,
idB));
}
}
// SQL Server
{
var collations = new[]
{
"SQL_Latin1_General_CP1_CI_AS",
};
foreach (var collation in collations)
{
foreach (var pair in GetPairs(ids))
{
var idA = pair.Item1;
var idB = pair.Item2;
using (var connection = new SqlConnection(@"Data Source=(localdb)\mssqllocaldb; Initial Catalog=NuGetGallery; Integrated Security=True; MultipleActiveResultSets=True"))
{
connection.Open();
using (var command = connection.CreateCommand())
{
command.CommandText = $@"
DECLARE @Values TABLE
(
[Value] [NVARCHAR] (255) COLLATE {collation} NOT NULL PRIMARY KEY
);
INSERT INTO @Values (Value) VALUES (@valueA);
SELECT COUNT(*) FROM @Values WHERE Value = @valueB;
";
command.Parameters.AddWithValue("@valueA", idA);
command.Parameters.AddWithValue("@valueB", idB);
var count = (int)command.ExecuteScalar();
var name = $"SQL Server collation {collation}";
names.Add(name);
if (count > 0)
{
results.Add(new EqualityResult(
name,
idA,
idB));
}
}
}
}
}
}
// Azure Blob Storage
{
var storageAccount = CloudStorageAccount.DevelopmentStorageAccount;
var blobClient = storageAccount.CreateCloudBlobClient();
var container = blobClient.GetContainerReference("casesensitivty");
container.CreateIfNotExists();
foreach (var blob in container.ListBlobs(useFlatBlobListing: true).OfType<CloudBlockBlob>())
{
blob.Delete();
}
var name = "Azure Blob Storage";
names.Add(name);
foreach (var pair in GetPairs(ids))
{
var idA = pair.Item1;
var idB = pair.Item2;
var blobA = container.GetBlockBlobReference($"{idA}.txt");
var blobB = container.GetBlockBlobReference($"{idB}.txt");
var expectedContents = Guid.NewGuid().ToString();
blobA.UploadText(expectedContents);
if (!blobB.Exists())
{
continue;
}
var actualContents = blobB.DownloadText();
if (actualContents != expectedContents)
{
continue;
}
results.Add(new EqualityResult(
name,
idA,
idB));
}
}
// Display results
using (var logStream = new FileStream("log.txt", FileMode.Create))
using (var writer = new StreamWriter(logStream))
{
foreach (var result in results)
{
names.Add(result.Name);
}
writer.WriteLine($"{"Area".PadRight(names.Max(x => x.Length), ' ')} | A | ?? | B | Out A | Out B");
writer.WriteLine($"{new string('-', names.Max(x => x.Length))} | - | -- | - | ----- | -----");
foreach (var name in names.OrderBy(x => x))
{
foreach (var pair in GetPairs(ids))
{
var idA = pair.Item1;
var idB = pair.Item2;
var equals = results.Contains(new EqualityResult(name, idA, idB));
writer.Write($"{name.PadRight(names.Max(x => x.Length), ' ')} | {idA} | {(equals ? "==" : "!=")} | {idB} | ");
switch (name)
{
case "ToLower()":
writer.Write($"{idA.ToLower()} | {idB.ToLower()}");
break;
case "ToUpper()":
writer.Write($"{idA.ToUpper()} | {idB.ToUpper()}");
break;
case "ToLowerInvariant()":
writer.Write($"{idA.ToLowerInvariant()} | {idB.ToLowerInvariant()}");
break;
case "ToUpperInvariant()":
writer.Write($"{idA.ToUpperInvariant()} | {idB.ToUpperInvariant()}");
break;
default:
writer.Write($" |");
break;
}
writer.WriteLine();
}
}
}
}
private static IEnumerable<Tuple<string, string>> GetPairs(IReadOnlyList<string> values)
{
var pairs = new List<Tuple<string, string>>();
for (var a = 0; a < values.Count; a++)
{
for (var b = a + 1; b < values.Count; b++)
{
var idA = values[a];
var idB = values[b];
pairs.Add(Tuple.Create(idA, idB));
}
}
return pairs
.OrderBy(x => x.Item1)
.ThenBy(x => x.Item2);
}
private static void ExecuteQuery(HashSet<EqualityResult> results, IndexSearcher searcher, string field, string id)
{
var queryParser = new QueryParser(
Lucene.Net.Util.Version.LUCENE_30,
Constants.Id,
new ExampleAnalyzer());
var queryString = $"+{field}:{id}";
var query = queryParser.Parse(queryString);
var queryResults = searcher.Search(query, 100);
foreach (var queryResult in queryResults.ScoreDocs)
{
results.Add(new EqualityResult(
$"Lucene {field}",
id,
searcher.Doc(queryResult.Doc).GetField(Constants.Id).StringValue));
}
}
private static Document CreateDocument(string id)
{
var document = new Document();
document.Add(new Field(
Constants.Id,
id,
Field.Store.YES,
Field.Index.ANALYZED,
Field.TermVector.WITH_POSITIONS_OFFSETS));
document.Add(new Field(
Constants.LowerCaseFilter,
id,
Field.Store.YES,
Field.Index.ANALYZED,
Field.TermVector.WITH_POSITIONS_OFFSETS));
document.Add(new Field(
Constants.LowerInvariantFilter,
id,
Field.Store.YES,
Field.Index.ANALYZED,
Field.TermVector.WITH_POSITIONS_OFFSETS));
return document;
}
}
public static class Constants
{
public const string Id = "Id";
public const string LowerCaseFilter = "LowerCaseFilter";
public const string LowerInvariantFilter = "LowerInvariantFilter";
}
public class ExampleAnalyzer : PerFieldAnalyzerWrapper
{
private static readonly IReadOnlyDictionary<string, Analyzer> FieldAnalyzers = new Dictionary<string, Analyzer>
{
{ Constants.Id, new IdAnalyzer(t => t) },
{ Constants.LowerCaseFilter, new IdAnalyzer(t => new LowerCaseFilter(t)) },
{ Constants.LowerInvariantFilter, new IdAnalyzer(t => new LowerInvariantFilter(t)) },
};
public ExampleAnalyzer() : base(
new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30),
FieldAnalyzers)
{
}
}
public sealed class LowerInvariantFilter : TokenFilter
{
private readonly ITermAttribute termAttribute;
public LowerInvariantFilter(TokenStream input) : base(input)
{
termAttribute = AddAttribute<ITermAttribute>();
}
public override bool IncrementToken()
{
if (input.IncrementToken())
{
var buffer = termAttribute.TermBuffer();
var length = termAttribute.TermLength();
for (int i = 0; i < length; i++)
{
buffer[i] = char.ToLowerInvariant(buffer[i]);
}
return true;
}
return false;
}
}
public class IdAnalyzer : Analyzer
{
private readonly Func<TokenStream, TokenStream> _wrapTokenStream;
public IdAnalyzer(Func<TokenStream, TokenStream> wrapTokenStream)
{
_wrapTokenStream = wrapTokenStream;
}
public override TokenStream TokenStream(string fieldName, TextReader reader)
{
return _wrapTokenStream(new KeywordTokenizer(reader));
}
}
public class EqualityResult : IEquatable<EqualityResult>
{
private readonly int _hashCode;
public EqualityResult(string name, string a, string b)
{
if (a.CompareTo(b) > 0)
{
var temp = b;
b = a;
a = temp;
}
Name = name;
A = a;
B = b;
_hashCode = $"{Name}/{a}/{b}".GetHashCode();
}
public string Name { get; }
public string A { get; }
public string B { get; }
public bool Equals(EqualityResult other)
{
if (other == null)
{
return false;
}
return Name == other.Name
&& A == other.A
&& B == other.B;
}
public override bool Equals(object obj)
{
return Equals(obj as EqualityResult);
}
public override int GetHashCode()
{
return _hashCode;
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment