Created
July 13, 2018 09:08
-
-
Save meijeru/02bc24b9797af9ad6d1c770095ee1947 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Red [ | |
Title: "Case folding table converter" | |
Purpose: {Transform Unicode data file into Red/System literal array} | |
Author: "Rudolf W. MEIJER" | |
File: %convert.red | |
Version: 0.1 | |
Rights: Copyright (c) 2018 Rudolf W. Meijer | |
History: [ | |
[0.1 2-Jul-2018 {Start of project}] | |
] | |
Language: 'English | |
Tabs: 4 | |
] | |
;---|----1----|----2----|----3----|----4----|----5----|----6----|----7----|- | |
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
; constants | |
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
input-file: http://www.unicode.org/Public/UCD/latest/ucd/CaseFolding.txt | |
output-file: %output.txt | |
first-line: "lowercase-table: [ ; Unicode latest case folding table,Upper -> Lower. Only status 'C' and 'S'" | |
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
; initialization | |
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
lines: read/lines input-file | |
attempt [delete output-file] | |
write/lines output-file first-line | |
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
; functions | |
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
convert-line: func [ | |
{convert one line of Unicode file into one line in the literal array} | |
s [string!] {the line to be converted} | |
/local | |
items {block of items split by "; "} | |
out {buffer to build output line} | |
][ | |
; model: | |
; 0041; C; 0061; # LATIN CAPITAL LETTER A | |
items: split s "; " | |
; we want only Common and Single items | |
if any [items/2 = "C" items/2 = "S"][ | |
out: copy "" | |
if 4 <> length? items/1 [ | |
insert items/1 "000" | |
insert items/3 "000" | |
] | |
append out " " | |
append out items/1 | |
append out "h " | |
append out items/3 | |
append out "h ; " | |
append out items/4 | |
write/lines/append output-file out | |
] | |
] | |
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
; main loop | |
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
foreach line lines [ | |
if line/1 <> #"#" [ | |
convert-line line | |
] | |
] | |
write/lines/append output-file "]" | |
ask "OK" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment