Last active
May 4, 2019 00:25
-
-
Save tarekgh/55dfaf0f44689738c3a6ca67941ccdc2 to your computer and use it in GitHub Desktop.
CaseFolding Project show how to generate and access case folding tables in optimized size way
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Licensed to the .NET Foundation under one or more agreements. | |
// The .NET Foundation licenses this file to you under the MIT license. | |
// See the LICENSE file in the project root for more information. | |
using System; | |
using System.IO; | |
using System.Globalization; | |
using System.Collections.Generic; | |
using System.Runtime.CompilerServices; | |
namespace CaseFolding | |
{ | |
class Program | |
{ | |
private static ushort[] l1; | |
private static ushort[] l2; | |
private static ushort[] l3; | |
static void Main(string[] args) | |
{ | |
Dictionary<ushort, ushort> simpleFoldingMapping = ReadCaseFolding(@"F:\Temp\CaseFolding.txt"); | |
GenerateTable8_4_4(simpleFoldingMapping, out l1, out l2, out l3); | |
DumpTable(l1, "L1"); | |
DumpTable(l2, "L2"); | |
DumpTable(l3, "L3"); | |
Console.WriteLine($"L1 Size = {l1.Length * sizeof(ushort), 4}"); | |
Console.WriteLine($"L2 Size = {l2.Length * sizeof(ushort), 4}"); | |
Console.WriteLine($"L3 Size = {l3.Length * sizeof(ushort), 4}"); | |
Console.WriteLine($"totale size = {(l1.Length + l2.Length + l3.Length) * sizeof(ushort)}"); | |
// Validate the generated tables | |
foreach (char kv in simpleFoldingMapping.Keys) | |
{ | |
ushort c = GetFoldCase(kv); | |
if ((ushort) c != simpleFoldingMapping[kv]) | |
Console.WriteLine($"... {kv:x4}: {c:x4} != {simpleFoldingMapping[kv]:x4}"); | |
} | |
Console.ReadLine(); | |
} | |
private static Dictionary<ushort, ushort> ReadCaseFolding(string CaseFoldingFilePath) | |
{ | |
Dictionary<ushort, ushort> simpleFoldingMapping = new Dictionary<ushort, ushort>(); | |
using (StreamReader sr = new StreamReader(CaseFoldingFilePath)) | |
{ | |
while (!sr.EndOfStream) | |
{ | |
String line = sr.ReadLine().Trim(); | |
if (String.IsNullOrEmpty(line) || line.IndexOf('#') == 0) | |
continue; | |
string[] parts = line.Split(';'); | |
parts[1] = parts[1].Trim(); | |
if (parts.Length < 4 || (parts[1] != "C" && parts[1] != "S")) | |
continue; | |
if (!int.TryParse(parts[0], NumberStyles.HexNumber, CultureInfo.InvariantCulture, out int n1) || | |
!int.TryParse(parts[2], NumberStyles.HexNumber, CultureInfo.InvariantCulture, out int n2)) | |
continue; | |
if (n1 > 0xFFFF) | |
break; | |
simpleFoldingMapping[(ushort)n1] = (ushort)n2; | |
} | |
} | |
return simpleFoldingMapping; | |
} | |
[MethodImpl(MethodImplOptions.AggressiveInlining)] | |
private static char GetFoldCase(char c) | |
{ | |
ushort v = l1[c >> 8]; | |
v = l2[v + ((c >> 4) & 0xF)]; | |
v = l3[v + (c & 0xF)]; | |
return v == 0 ? c : (char)v; | |
} | |
private static void GenerateTable8_4_4(Dictionary<ushort, ushort> rawData, out ushort [] l1, out ushort[] l2, out ushort [] l3) | |
{ | |
Dictionary<string, ushort> level2Hash = new Dictionary<string, ushort>(); | |
Dictionary<string, ushort> level3Hash = new Dictionary<string, ushort>(); | |
List<ushort> level1Index = new List<ushort>(); | |
List<ushort> level2Index = new List<ushort>(); | |
List<ushort> level3Data = new List<ushort>(); | |
const ushort planes = 1; // can be 17 | |
ushort ch = 0; | |
ushort valueInHash; | |
for (ushort i = 0; i < 256 * planes; i++) | |
{ | |
// Generate level 1 indice | |
// This is the row data which contains a row of indice for level 2 table. | |
string level2RowData = ""; | |
for (ushort j = 0; j < 16; j++) | |
{ | |
// Generate level 2 indice | |
string level3RowData = ""; | |
for (ushort k = 0; k < 16; k++) | |
{ | |
// Generate level 3 values by grouping 16 values together. | |
// each element of the 16 value group is seperated by ";" | |
if (rawData.TryGetValue(ch, out ushort value)) | |
{ | |
// There is data defined for this codepoint. Use it. | |
level3RowData = level3RowData + value + ";"; | |
} | |
else | |
{ | |
// There is no data defined for this codepoint. Use the default value | |
// specified in the ctor. | |
level3RowData = level3RowData + 0 + ";"; | |
} | |
ch++; | |
} | |
// Check if the pattern of these 16 values happens before. | |
if (!level3Hash.TryGetValue(level3RowData, out valueInHash)) | |
{ | |
// This is a new group in the level 3 values. | |
// Get the current count of level 3 group count for this plane. | |
valueInHash = (ushort)level3Data.Count; | |
// Store this count to the hash table, keyed by the pattern of these 16 values. | |
level3Hash[level3RowData] = valueInHash; | |
// Populate the 16 values into level 3 data table for this plane. | |
string [] values = level3RowData.Split(';'); | |
foreach (string s in values) | |
{ | |
if (s.Length > 0) | |
level3Data.Add(ushort.Parse(s)); | |
} | |
} | |
level2RowData = level2RowData + String.Format("{0:x4}", valueInHash) + ","; | |
} | |
if (!level2Hash.TryGetValue(level2RowData, out valueInHash)) | |
{ | |
// Get the count of the current level 2 index table. | |
valueInHash = (ushort) level2Index.Count; | |
level2Hash[level2RowData] = valueInHash; | |
// Populate the 16 values into level 2 data table for this plane. | |
foreach (string s in level2RowData.Split(',')) | |
{ | |
if (s.Length > 0) | |
level2Index.Add(ushort.Parse(s, NumberStyles.HexNumber, CultureInfo.InvariantCulture)); | |
} | |
} | |
// Populate the index values into level 1 index table. | |
level1Index.Add(valueInHash); | |
} | |
l1 = level1Index.ToArray(); | |
l2 = level2Index.ToArray(); | |
l3 = level3Data.ToArray(); | |
} | |
private static void DumpTable(ushort[] table, string name) | |
{ | |
const int RawWidth = 16; | |
Console.Write($"int [] {name} =\n{{\n"); | |
Console.Write($"//"); | |
for (int i = 0; i < RawWidth; i++) | |
{ | |
Console.Write($"{i,6:x} "); | |
} | |
Console.Write($"\n 0x{table[0]:x4}, "); | |
for (int i = 1; i < table.Length; i++) | |
{ | |
Console.Write($"0x{table[i]:x4}, "); | |
if ((i + 1) % RawWidth == 0) | |
{ | |
Console.WriteLine($" // {i - 15:x4} .. {i:x4}"); | |
Console.Write($" "); | |
} | |
} | |
Console.WriteLine("\n};\n"); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment