Skip to content

Instantly share code, notes, and snippets.

@hytti
Created July 23, 2017 16:14
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hytti/57d056852ff603cf9c9287414bdd595e to your computer and use it in GitHub Desktop.
Save hytti/57d056852ff603cf9c9287414bdd595e to your computer and use it in GitHub Desktop.
An AppleScript for encoding text for use in a URL (UTF8)
(*
this is a modification and extension of a sub-routine for encoding high-ASCII characters that can be found at:
[AppleScript - Essential Sub-Routines - Text Encoding | Decoding](https://www.macosxautomation.com/applescript/sbrt/sbrt-08.html)
obviously a more modern approach would be to use the javascript variant and its built in encodeURI() but if you have legacy scripts still working perfectly this small addition might be handy.
the short section demoing the use of this encoding with the x-callback-url -protocol was borrowed from:
[applescript support bring "x-callback-url" functions to the mac -) - Bear - Discussion Area - Shiny Frog Support](http://help.shinyfrog.net/discussions/bear/2292-applescript-support-bring-x-callback-url-functions-to-the-mac)
the swapChar-list is used to swap the most commonly used accented characters from "combined graphemes" (see: Combining Diacritical Marks) to a single unicode code point character. on a Mac both the finder and safari have an irritating tendency to favour the combined graphemes. this creates terrible looking characters and other annoyances.
*)
global swapChar
set swapChar to {{"%61%CC%80", "%C3%A0"}, {"%61%CC%81", "%C3%A1"}, {"%61%CC%82", "%C3%A2"}, {"%61%CC%83", "%C3%A3"}, {"%61%CC%88", "%C3%A4"}, {"%61%CC%8A", "%C3%A5"}, {"%63%CC%A7", "%C3%A7"}, {"%65%CC%80", "%C3%A8"}, {"%65%CC%81", "%C3%A9"}, {"%65%CC%82", "%C3%AA"}, {"%65%CC%88", "%C3%AB"}, {"%69%CC%80", "%C3%AC"}, {"%69%CC%81", "%C3%AD"}, {"%69%CC%82", "%C3%AE"}, {"%69%CC%88", "%C3%AF"}, {"%6E%CC%83", "%C3%B1"}, {"%6F%CC%82", "%C3%B4"}, {"%6F%CC%83", "%C3%B5"}, {"%6F%CC%88", "%C3%B6"}, {"%6F%CC%B7", "%C3%B8"}, {"%73%CC%8C", "%C5%A1"}, {"%75%CC%80", "%C3%B9"}, {"%75%CC%81", "%C3%BA"}, {"%75%CC%82", "%C3%BB"}, {"%75%CC%88", "%C3%BC"}, {"%79%CC%81", "%C3%BD"}, {"%79%CC%88", "%C3%BF"}, {"%7A%CC%8C", "%C5%BE"}, {"%41%CC%80", "%C3%80"}, {"%41%CC%81", "%C3%81"}, {"%41%CC%82", "%C3%82"}, {"%41%CC%83", "%C3%83"}, {"%41%CC%88", "%C3%84"}, {"%41%CC%8A", "%C3%85"}, {"%43%CC%A7", "%C3%87"}, {"%45%CC%80", "%C3%88"}, {"%45%CC%81", "%C3%89"}, {"%45%CC%82", "%C3%8A"}, {"%45%CC%88", "%C3%8B"}, {"%49%CC%80", "%C3%8C"}, {"%49%CC%81", "%C3%8D"}, {"%49%CC%82", "%C3%8E"}, {"%49%CC%88", "%C3%8F"}, {"%4E%CC%83", "%C3%91"}, {"%4F%CC%82", "%C3%94"}, {"%4F%CC%83", "%C3%95"}, {"%4F%CC%88", "%C3%96"}, {"%4F%CC%B7", "%C3%98"}, {"%53%CC%8C", "%C5%A0"}, {"%55%CC%80", "%C3%99"}, {"%55%CC%81", "%C3%9A"}, {"%55%CC%82", "%C3%9B"}, {"%55%CC%88", "%C3%9C"}, {"%59%CC%88", "%C5%B8"}, {"%5A%CC%8C", "%C5%BD"}}
-- a section demonstrating the use of the encoding with Bear.app
set aText to "this text will be appended to aNoteToBeExtended"
set appender to my encode_text(aText, true, false)
set xurl to "bear://x-callback-url/add-text?text=" & appender & "&title=aNoteToBeExtended&mode=append"
tell application "Bear"
open location xurl
end tell
-- end of Bear.app example
-- the encoding sub-routines:
on encode_byte(this_byte)
set the hex_list to {"0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "A", "B", "C", "D", "E", "F"}
set x to item ((this_byte div 16) + 1) of the hex_list
set y to item ((this_byte mod 16) + 1) of the hex_list
return ("%" & x & y) as string
end encode_byte
on encode_char(this_char)
set the UTF8_id to (the id of this_char)
if class of (UTF8_id) is list then
set UTF8_ids to UTF8_id
set isCombo to true
else
set UTF8_ids to {UTF8_id}
set isCombo to false
end if
set toReturn to ""
repeat with UTF8_num in UTF8_ids
log UTF8_num & class of UTF8_num
if UTF8_num < 128 then
set the UTF8_byte to encode_byte(UTF8_num)
set toReturn to toReturn & UTF8_byte
log toReturn
else if UTF8_num < 2048 then
set UTF8_2_1 to encode_byte((UTF8_num div 64) + 192)
set UTF8_2_2 to encode_byte((UTF8_num mod 64) + 128)
set toReturn to toReturn & UTF8_2_1 & UTF8_2_2
log toReturn
else if UTF8_num < 65536 then
set UTF8_3_1 to encode_byte((UTF8_num div 4096) + 224)
set UTF8_3_2 to encode_byte(((UTF8_num mod 4096) div 64) + 128)
set UTF8_3_3 to encode_byte((UTF8_num mod 64) + 128)
set toReturn to toReturn & UTF8_3_1 & UTF8_3_2 & UTF8_3_3
else
set toReturn to "?"
end if
end repeat
if isCombo then
repeat with theItem in swapChar
if toReturn is (item 1 of theItem) then
set toReturn to item 2 of theItem
end if
end repeat
end if
return toReturn
end encode_char
-- this sub-routine is used to encode text
on encode_text(this_text, encode_URL_A, encode_URL_B)
set the standard_characters to "abcdefghijklmnopqrstuvwxyz0123456789"
set the URL_A_chars to "$+!'/?;&@=#%><{}[]\"~`^\\|*"
set the URL_B_chars to ".-_:"
set the acceptable_characters to the standard_characters
if encode_URL_A is false then set the acceptable_characters to the acceptable_characters & the URL_A_chars
if encode_URL_B is false then set the acceptable_characters to the acceptable_characters & the URL_B_chars
set the encoded_text to ""
repeat with this_char in this_text
if this_char is in the acceptable_characters then
set the encoded_text to (the encoded_text & this_char)
else
set the encoded_text to (the encoded_text & encode_char(this_char)) as string
end if
end repeat
return the encoded_text
end encode_text
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment