Skip to content

Instantly share code, notes, and snippets.

@Shayan-To
Created April 19, 2017 13:57
Show Gist options
  • Save Shayan-To/2d3825bec009670999f1d770f31824e1 to your computer and use it in GitHub Desktop.
Save Shayan-To/2d3825bec009670999f1d770f31824e1 to your computer and use it in GitHub Desktop.
A script for extracting bidirectional class of characters from Unicode character database
# This script needs http://www.unicode.org/Public/9.0.0/ucd/UCD.zip extracted next to it.
$Content = Get-Content .\UCD\extracted\DerivedBidiClass.txt
$List = (0..[System.Convert]::ToInt32("110000", 16) | ForEach-Object {"L"})
$Content = $Content | ForEach-Object {$_.Substring(0, ($_.IndexOf("#") | ForEach-Object {If ($_ -NE -1) {$_} Else {0}})).Trim()} `
| Where-Object Length -NE 0
ForEach ($L In $Content)
{
$D = $L -Split ";"
If ($D.Length -NE 2)
{
Throw ""
}
$Range = $D[0].Trim()
$Class = $D[1].Trim()
$Range = ($Range -Split "\.\." | ForEach-Object {[System.Convert]::ToInt32($_, 16)})
If ($Range.GetType() -EQ [Int])
{
$List[$Range] = $Class
}
Else
{
If ($Range.Length -NE 2)
{
Throw ""
}
For ($I = $Range[0]; $I -LE $Range[1]; $I++)
{
$List[$I] = $Class
}
}
}
# Bidirectional type descriptions: ======================================================
$BidiClassDesc = [System.Collections.Generic.Dictionary[String, String]]::New()
#Strong:
$BidiClassDesc.Add("L", "Left_To_Right")
$BidiClassDesc.Add("R", "Right_To_Left")
$BidiClassDesc.Add("AL", "Arabic_Letter")
#Weak:
$BidiClassDesc.Add("EN", "European_Number")
$BidiClassDesc.Add("ES", "European_Separator")
$BidiClassDesc.Add("ET", "European_Terminator")
$BidiClassDesc.Add("AN", "Arabic_Number")
$BidiClassDesc.Add("CS", "Common_Separator")
$BidiClassDesc.Add("NSM", "Nonspacing_Mark")
$BidiClassDesc.Add("BN", "Boundary_Neutral")
#Neutral:
$BidiClassDesc.Add("B", "Paragraph_Separator")
$BidiClassDesc.Add("S", "Segment_Separator")
$BidiClassDesc.Add("WS", "White_Space")
$BidiClassDesc.Add("ON", "Other_Neutral")
#Explicit Formatting:
$BidiClassDesc.Add("LRE", "Left_To_Right_Embedding")
$BidiClassDesc.Add("LRO", "Left_To_Right_Override")
$BidiClassDesc.Add("RLE", "Right_To_Left_Embedding")
$BidiClassDesc.Add("RLO", "Right_To_Left_Override")
$BidiClassDesc.Add("PDF", "Pop_Directional_Format")
$BidiClassDesc.Add("LRI", "Left_To_Right_Isolate")
$BidiClassDesc.Add("RLI", "Right_To_Left_Isolate")
$BidiClassDesc.Add("FSI", "First_Strong_Isolate")
$BidiClassDesc.Add("PDI", "Pop_Directional_Isolate")
# Write all data in form of ranges: =====================================================
Clear-Host
Write-Host
Write-Host "All data:"
Write-Host
For ($I = 0; $I -LT $List.Length; $I++)
{
$V = $List[$I]
$First = $I
While ($List[$I] -EQ $V)
{
$I++
If ($I -EQ $List.Length)
{
Break
}
}
$I--
$Last = $I
If ($First -EQ $Last)
{
Write-Host "$([System.Convert]::ToString($First, 16).PadLeft(5, '0'[0]))`t`t`t`t$V`t`t$($BidiClassDesc[$V])"
}
Else
{
Write-Host "$([System.Convert]::ToString($First, 16).PadLeft(5, '0'[0]))..$([System.Convert]::ToString($Last, 16).PadLeft(5, '0'[0]))`t`t$V`t`t$($BidiClassDesc[$V])"
}
}
# Initializations for base conversion: ==================================================
If ($True) # Base-92
{
$Digits = (0..9 | ForEach-Object {[Char]([Int]('0'[0]) + $_)})
$Digits += (0..25 | ForEach-Object {[Char]([Int]('a'[0]) + $_)})
$Digits += (0..25 | ForEach-Object {[Char]([Int]('A'[0]) + $_)})
$Digits += 32..126 | ForEach-Object {[Char]$_} | Where-Object {$_ -NE '\'[0] -And $_ -NE ' '[0] -And $_ -NE ''''[0]}
$Digits = $Digits | Select-Object -Unique
$MaxDigits = 3
}
Else # Base-16
{
$Digits = (0..9 | ForEach-Object {[Char]([Int]('0'[0]) + $_)})
$Digits += (10..15 | ForEach-Object {[Char]([Int]('a'[0]) + $_ - 10)})
$MaxDigits = 5
}
$Digits = $Digits -Join ''
Function ChangeBase([Int] $I)
{
$Res = ""
While ($I -NE 0)
{
$R = 0
$I = [System.Math]::DivRem($I, $Digits.Length, [Ref] $R)
$Res = $Digits[$R] + $Res
}
Return $Res
}
# Extract ranges: =======================================================================
Write-Host
Write-Host "Strong non-L ranges:"
Write-Host
$Set = @("R", "AL")
For ($I = 0; $I -LT $List.Length; $I++)
{
$C = $Set -contains $List[$I]
$First = $I
While (($Set -contains $List[$I]) -EQ $C)
{
$I++
If ($I -EQ $List.Length)
{
Break
}
}
$I--
$Last = $I
If ($C)
{
Write-Host -NoNewline "$((ChangeBase $First).PadLeft($MaxDigits, $Digits[0])) $((ChangeBase $Last).PadLeft($MaxDigits, $Digits[0])) "
}
}
Write-Host
Write-Host
Write-Host "Strong L ranges:"
Write-Host
$Set = @("L")
For ($I = 0; $I -LT $List.Length; $I++)
{
$C = $Set -contains $List[$I]
$First = $I
While (($Set -contains $List[$I]) -EQ $C)
{
$I++
If ($I -EQ $List.Length)
{
Break
}
}
$I--
$Last = $I
If ($C)
{
Write-Host -NoNewline "$((ChangeBase $First).PadLeft($MaxDigits, $Digits[0])) $((ChangeBase $Last).PadLeft($MaxDigits, $Digits[0])) "
}
}
Write-Host
Write-Host
Write-Host "Non-strong ranges:"
Write-Host
$Set = @("L", "R", "AL")
For ($I = 0; $I -LT $List.Length; $I++)
{
$C = $Set -contains $List[$I]
$First = $I
While (($Set -contains $List[$I]) -EQ $C)
{
$I++
If ($I -EQ $List.Length)
{
Break
}
}
$I--
$Last = $I
If (-Not $C)
{
Write-Host -NoNewline "$((ChangeBase $First).PadLeft($MaxDigits, $Digits[0])) $((ChangeBase $Last).PadLeft($MaxDigits, $Digits[0])) "
}
}
Write-Host
Write-Host
Write-Host "Strong ranges:"
Write-Host
$Set = @("L", "R", "AL")
For ($I = 0; $I -LT $List.Length; $I++)
{
$C = $Set -contains $List[$I]
$First = $I
While (($Set -contains $List[$I]) -EQ $C)
{
$I++
If ($I -EQ $List.Length)
{
Break
}
}
$I--
$Last = $I
If ($C)
{
Write-Host -NoNewline "$((ChangeBase $First).PadLeft($MaxDigits, $Digits[0])) $((ChangeBase $Last).PadLeft($MaxDigits, $Digits[0])) "
}
}
Write-Host
Write-Host
Write-Host "Digits:"
Write-Host
Write-Host $Digits
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment