Last active
April 13, 2020 12:03
-
-
Save toomasv/74a22688ff1efe12ebc8a647dffddd15 to your computer and use it in GitHub Desktop.
Replace several items in one run + example converters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Red [ | |
Description: {Replace several items in one run + example conversions} | |
Date: 27-Mar-2020 | |
Author: "Toomas Vooglaid" | |
] | |
context [ | |
sp: charset " ^-" | |
sp*: [any sp] | |
digit: charset "0123456789" | |
integer: [some digit] | |
hexa: union charset [#"A" - #"F" #"a" - #"f"] digit | |
octal: charset "01234567" | |
int: hx: oc: none | |
get-hexa: func [hx][ | |
if odd? length? hx [insert hx #"0"] | |
to char! to integer! debase/base hx 16 | |
] | |
get-octal: function [oc][ | |
b: rejoin collect [ | |
foreach o oc [ | |
keep take/last/part enbase/base debase/base append copy "0" o 16 2 3 | |
] | |
] | |
insert/dup b #"0" 8 - ((length? b) % 8) | |
get-hexa enbase/base debase/base b 2 16 | |
] | |
set 'replace-each function [ | |
"Replace each needle in series" | |
series [series!] | |
needles [series!] | |
/with replacements [series!] | |
][ | |
rule: collect [ | |
forall needles [ | |
i: index? needles | |
replacement: either with [replacements/:i][second needles] | |
keep 'change | |
keep/only needles/1 | |
keep/only append copy quote () replacement | |
keep '| | |
if not with [needles: next needles] | |
] | |
] | |
append rule 'skip | |
parse/case series [some rule] | |
series | |
] | |
set 'c-esc func [ | |
"Replace chars with c-escapes" | |
str [string!] | |
][ | |
replace-each/with str | |
"^(07)^(08)^(1B)^(0C)^(0A)^(0D)^(09)^(0B)^(5C)^(27)^(22)^(3F)" | |
["\a" "\b" "\e" "\f" "\n" "\r" "\t" "\v" "\\" "\'" {\"} "\?"] | |
] | |
set 'c-str func [ | |
"Replace c-escapes with chars" | |
str [string!] | |
][ | |
replace-each str [ | |
"\a" #"^(07)" | |
"\b" #"^(08)" | |
"\e" #"^(1B)" | |
"\f" #"^(0C)" | |
"\n" #"^(0A)" | |
"\r" #"^(0D)" | |
"\t" #"^(09)" | |
"\v" #"^(0B)" | |
"\\" #"^(5C)" | |
"\'" #"^(27)" | |
{\"} #"^(22)" | |
"\?" #"^(3F)" | |
["\U" copy hx 8 hexa] [get-hexa hx] | |
["\u{" copy hx 1 6 hexa #"}"] [get-hexa hx] | |
["\u" copy hx 4 hexa] [get-hexa hx] | |
["\x" copy hx some hexa] [get-hexa hx] | |
[#"\" copy oc 1 3 octal] [get-octal oc] | |
] | |
] | |
set 'clean-html func [ | |
"Clean html source from tags and replace entities" | |
src [string!] | |
][ | |
replace-each src [ | |
["<script" thru "</script>"] "" | |
["<style" thru "</style>"] "" | |
[#"<" thru #">"] "" | |
["&#" copy int integer #";"] [to char! int] | |
["&#x" copy hx some hexa #";"] [get-hexa hx] | |
"¢" #"¢" | |
"£" #"£" | |
"§" #"§" | |
"©" #"©" | |
"«" #"«" | |
"»" #"»" | |
"®" #"®" | |
"°" #"°" | |
"±" #"±" | |
"¶" #"¶" | |
"·" #"·" | |
"½" #"½" | |
"⅓" #"⅓" | |
"⅔" #"⅔" | |
"⅕" #"⅕" | |
"⅖" #"⅖" | |
"⅗" #"⅗" | |
"⅘" #"⅘" | |
"⅙" #"⅙" | |
"⅚" #"⅚" | |
"⅛" #"⅛" | |
"⅜" #"⅜" | |
"⅝" #"⅝" | |
"⅞" #"⅞" | |
"–" #"–" | |
"—" #"—" | |
"‘" #"‘" | |
"’" #"’" | |
"‚" #"‚" | |
"“" #"“" | |
"”" #"”" | |
"„" #"„" | |
"†" #"†" | |
"‡" #"‡" | |
"•" #"•" | |
"…" #"…" | |
"′" #"′" | |
"″" #"″" | |
"€" #"€" | |
"™" #"™" | |
"≈" #"≈" | |
" " #" " | |
"&" #"&" | |
""" #"^"" ;" | |
"≠" #"≠" | |
"≤" #"≤" | |
"≥" #"≥" | |
"<" #"<" | |
">" #">" | |
"Α" #"Α" | |
"Β" #"Β" | |
"Γ" #"Γ" | |
"Δ" #"Δ" | |
"Ε" #"Ε" | |
"Ζ" #"Ζ" | |
"Η" #"Η" | |
"Θ" #"Θ" | |
"Ι" #"Ι" | |
"Κ" #"Κ" | |
"Λ" #"Λ" | |
"Μ" #"Μ" | |
"Ν" #"Ν" | |
"Ξ" #"Ξ" | |
"Ο" #"Ο" | |
"Π" #"Π" | |
"Ρ" #"Ρ" | |
"Σ" #"Σ" | |
"Τ" #"Τ" | |
"Υ" #"Υ" | |
"Φ" #"Φ" | |
"Χ" #"Χ" | |
"Ψ" #"Ψ" | |
"Ω" #"Ω" | |
"α" #"α" | |
"β" #"β" | |
"γ" #"γ" | |
"δ" #"δ" | |
"ε" #"ε" | |
"ζ" #"ζ" | |
"η" #"η" | |
"θ" #"θ" | |
"ι" #"ι" | |
"κ" #"κ" | |
"λ" #"λ" | |
"μ" #"μ" | |
"ν" #"ν" | |
"ξ" #"ξ" | |
"ο" #"ο" | |
"π" #"π" | |
"ρ" #"ρ" | |
"ς" #"ς" | |
"σ" #"σ" | |
"τ" #"τ" | |
"υ" #"υ" | |
"φ" #"φ" | |
"χ" #"χ" | |
"ψ" #"ψ" | |
"ω" #"ω" | |
] | |
replace/all src [#"^/" sp* #"^/" some [sp* #"^/"]] "^/^/" | |
] | |
set 'opf func [str [string!]] [ | |
replace-each/with str | |
"ABCDEFGHIJKLMOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" | |
"𝔸𝔹ℂ𝔻𝔼𝔽𝔾ℍ𝕀𝕁𝕂𝕃𝕄𝕆ℙℚℝ𝕊𝕋𝕌𝕍𝕎𝕏𝕐ℤ𝕒𝕓𝕔𝕕𝕖𝕗𝕘𝕙𝕚𝕛𝕜𝕝𝕞𝕟𝕠𝕡𝕢𝕣𝕤𝕥𝕦𝕧𝕨𝕩𝕪𝕫" | |
] | |
set 'fr func [str [string!]] [ | |
replace-each/with str | |
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" | |
"𝔄𝔅ℭ𝔇𝔈𝔉𝔊ℌℑ𝔍𝔎𝔏𝔐𝔑𝔒𝔓𝔔ℜ𝔖𝔗𝔘𝔙𝔚𝔛𝔜ℨ𝔞𝔟𝔠𝔡𝔢𝔣𝔤𝔥𝔦𝔧𝔨𝔩𝔪𝔫𝔬𝔭𝔮𝔯𝔰𝔱𝔲𝔳𝔴𝔵𝔶𝔷" | |
] | |
set 'scr func [str [string!]] [ | |
replace-each/with str | |
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" | |
"𝒜ℬ𝒞𝒟ℰℱ𝒢ℋℐ𝒥𝒦ℒℳ𝒩𝒪𝒫𝒬ℛ𝒮𝒯𝒰𝒱𝒲𝒳𝒴𝒵𝒶𝒷𝒸𝒹ℯ𝒻ℊ𝒽𝒾𝒿𝓀𝓁𝓂𝓃ℴ𝓅𝓆𝓇𝓈𝓉𝓊𝓋𝓌𝓍𝓎𝓏" | |
] | |
set 'greek function [str [string!]] [ | |
replace-each/with str | |
["A" "B" "G" "D" "E" "Z" "H" ["TH" | "Th"] "I" "K" "L" "M" "N" "X" "O" "P" "R" "S" "T" "Y" "F" "C" ["PS" | "Ps"] "O" | |
"a" "b" "g" "d" "e" "z" "h" "th" "i" "k" "l" "m" "n" "x" "o" "p" "r" "s" "s" "t" "y" "f" "c" "ps" "o"] | |
["Α" "Β" "Γ" "Δ" "Ε" "Ζ" "Η" "Θ" "Ι" "Κ" "Λ" "Μ" "Ν" "Ξ" "Ο" "Π" "Ρ" "Σ" "Τ" "Υ" "Φ" "Χ" "Ψ" "Ω" | |
"α" "β" "γ" "δ" "ε" "ζ" "η" "θ" "ι" "κ" "λ" "μ" "ν" "ξ" "ο" "π" "ρ" "ς" "σ" "τ" "υ" "φ" "χ" "ψ" "ω"] | |
] | |
] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment