Skip to content

Instantly share code, notes, and snippets.

@ladislav
Last active December 15, 2015 20:40
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ladislav/5319674 to your computer and use it in GitHub Desktop.
Save ladislav/5319674 to your computer and use it in GitHub Desktop.
make object! [
; Private Unicode Area for encoded delimiters and octets
url-pua-start: #"^(e000)"
url-pua-end: #"^(e0ff)"
delimiter: charset ":/?#[]@!$&'()*+,;="
hex-digit: charset [#"0" - #"9" #"a" - #"f" #"A" - #"F"]
ascii: charset [#"^(00)" - #"^(7f)"]
two-byte-start: charset [#"^(c0)" - #"^(df)"]
three-byte-start: charset [#"^(e0)" - #"^(ef)"]
four-byte-start: charset [#"^(f0)" - #"^(f7)"]
continuation: charset [#"^(80)" - #"^(bf)"]
bmp-utf-8: [
ascii
|
two-byte-start continuation
|
three-byte-start 2 continuation
]
set 'load-url func [
source [string!]
/local result here percent-group octet octet-group success
] [
result: make url! 0
octet-group: copy #{}
parse source [
any [
; percent encoding
copy percent-group
[
#"%"
[
2 hex-digit
|
(do make error! "Invalid percent encoding")
]
]
(
octet: to integer! first dehex percent-group
append octet-group octet
)
|
; process octet group
(
parse octet-group [
any [
; escaped delimiter?
set character delimiter
(append result add url-pua-start character)
|
; BMP UTF-8?
copy character bmp-utf-8
(
character: first to string! character
success: either all [
url-pua-start <= character
url-pua-end >= character
] [
; URL-PUA CP
[end skip]
] [
; public Unicode CP
append result character
none
]
)
success
|
; just an octet
set character skip
(append result add url-pua-start character)
]
]
octet-group: copy #{}
)
set character skip
(
either all [
url-pua-start <= character
url-pua-end >= character
] [
; URL-PUA CP
character: to binary! character
parse character [
any [
set character skip
(append result add url-pua-start character)
]
]
] [
append result character
]
)
]
]
result
]
encodable-chars: charset [
; control characters
#"^(00)" - #"^(1f)" #"^(7f)"
; spaces
#" " #"^(a0)"
; percent
#"%"
]
set 'mold-url func [
value [url!]
/local result character
] [
result: copy ""
parse value [
any [
set character skip
(
either all [
url-pua-start <= character
url-pua-end >= character
] [
; an octet
append result #"%"
character: to char! character - url-pua-start
character: append copy "" character
append result enbase/base character 16
] [
; public Unicode CP
either find encodable-chars character [
; this character has to be encoded
append result #"%"
character: append copy "" character
append result enbase/base character 16
] [
append result character
]
]
)
]
]
result
]
]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment