Created
July 23, 2017 16:14
-
-
Save hytti/57d056852ff603cf9c9287414bdd595e to your computer and use it in GitHub Desktop.
An AppleScript for encoding text for use in a URL (UTF8)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(* | |
this is a modification and extension of a sub-routine for encoding high-ASCII characters that can be found at: | |
[AppleScript - Essential Sub-Routines - Text Encoding | Decoding](https://www.macosxautomation.com/applescript/sbrt/sbrt-08.html) | |
obviously a more modern approach would be to use the javascript variant and its built in encodeURI() but if you have legacy scripts still working perfectly this small addition might be handy. | |
the short section demoing the use of this encoding with the x-callback-url -protocol was borrowed from: | |
[applescript support bring "x-callback-url" functions to the mac -) - Bear - Discussion Area - Shiny Frog Support](http://help.shinyfrog.net/discussions/bear/2292-applescript-support-bring-x-callback-url-functions-to-the-mac) | |
the swapChar-list is used to swap the most commonly used accented characters from "combined graphemes" (see: Combining Diacritical Marks) to a single unicode code point character. on a Mac both the finder and safari have an irritating tendency to favour the combined graphemes. this creates terrible looking characters and other annoyances. | |
*) | |
global swapChar | |
set swapChar to {{"%61%CC%80", "%C3%A0"}, {"%61%CC%81", "%C3%A1"}, {"%61%CC%82", "%C3%A2"}, {"%61%CC%83", "%C3%A3"}, {"%61%CC%88", "%C3%A4"}, {"%61%CC%8A", "%C3%A5"}, {"%63%CC%A7", "%C3%A7"}, {"%65%CC%80", "%C3%A8"}, {"%65%CC%81", "%C3%A9"}, {"%65%CC%82", "%C3%AA"}, {"%65%CC%88", "%C3%AB"}, {"%69%CC%80", "%C3%AC"}, {"%69%CC%81", "%C3%AD"}, {"%69%CC%82", "%C3%AE"}, {"%69%CC%88", "%C3%AF"}, {"%6E%CC%83", "%C3%B1"}, {"%6F%CC%82", "%C3%B4"}, {"%6F%CC%83", "%C3%B5"}, {"%6F%CC%88", "%C3%B6"}, {"%6F%CC%B7", "%C3%B8"}, {"%73%CC%8C", "%C5%A1"}, {"%75%CC%80", "%C3%B9"}, {"%75%CC%81", "%C3%BA"}, {"%75%CC%82", "%C3%BB"}, {"%75%CC%88", "%C3%BC"}, {"%79%CC%81", "%C3%BD"}, {"%79%CC%88", "%C3%BF"}, {"%7A%CC%8C", "%C5%BE"}, {"%41%CC%80", "%C3%80"}, {"%41%CC%81", "%C3%81"}, {"%41%CC%82", "%C3%82"}, {"%41%CC%83", "%C3%83"}, {"%41%CC%88", "%C3%84"}, {"%41%CC%8A", "%C3%85"}, {"%43%CC%A7", "%C3%87"}, {"%45%CC%80", "%C3%88"}, {"%45%CC%81", "%C3%89"}, {"%45%CC%82", "%C3%8A"}, {"%45%CC%88", "%C3%8B"}, {"%49%CC%80", "%C3%8C"}, {"%49%CC%81", "%C3%8D"}, {"%49%CC%82", "%C3%8E"}, {"%49%CC%88", "%C3%8F"}, {"%4E%CC%83", "%C3%91"}, {"%4F%CC%82", "%C3%94"}, {"%4F%CC%83", "%C3%95"}, {"%4F%CC%88", "%C3%96"}, {"%4F%CC%B7", "%C3%98"}, {"%53%CC%8C", "%C5%A0"}, {"%55%CC%80", "%C3%99"}, {"%55%CC%81", "%C3%9A"}, {"%55%CC%82", "%C3%9B"}, {"%55%CC%88", "%C3%9C"}, {"%59%CC%88", "%C5%B8"}, {"%5A%CC%8C", "%C5%BD"}} | |
-- a section demonstrating the use of the encoding with Bear.app | |
set aText to "this text will be appended to aNoteToBeExtended" | |
set appender to my encode_text(aText, true, false) | |
set xurl to "bear://x-callback-url/add-text?text=" & appender & "&title=aNoteToBeExtended&mode=append" | |
tell application "Bear" | |
open location xurl | |
end tell | |
-- end of Bear.app example | |
-- the encoding sub-routines: | |
on encode_byte(this_byte) | |
set the hex_list to {"0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "A", "B", "C", "D", "E", "F"} | |
set x to item ((this_byte div 16) + 1) of the hex_list | |
set y to item ((this_byte mod 16) + 1) of the hex_list | |
return ("%" & x & y) as string | |
end encode_byte | |
on encode_char(this_char) | |
set the UTF8_id to (the id of this_char) | |
if class of (UTF8_id) is list then | |
set UTF8_ids to UTF8_id | |
set isCombo to true | |
else | |
set UTF8_ids to {UTF8_id} | |
set isCombo to false | |
end if | |
set toReturn to "" | |
repeat with UTF8_num in UTF8_ids | |
log UTF8_num & class of UTF8_num | |
if UTF8_num < 128 then | |
set the UTF8_byte to encode_byte(UTF8_num) | |
set toReturn to toReturn & UTF8_byte | |
log toReturn | |
else if UTF8_num < 2048 then | |
set UTF8_2_1 to encode_byte((UTF8_num div 64) + 192) | |
set UTF8_2_2 to encode_byte((UTF8_num mod 64) + 128) | |
set toReturn to toReturn & UTF8_2_1 & UTF8_2_2 | |
log toReturn | |
else if UTF8_num < 65536 then | |
set UTF8_3_1 to encode_byte((UTF8_num div 4096) + 224) | |
set UTF8_3_2 to encode_byte(((UTF8_num mod 4096) div 64) + 128) | |
set UTF8_3_3 to encode_byte((UTF8_num mod 64) + 128) | |
set toReturn to toReturn & UTF8_3_1 & UTF8_3_2 & UTF8_3_3 | |
else | |
set toReturn to "?" | |
end if | |
end repeat | |
if isCombo then | |
repeat with theItem in swapChar | |
if toReturn is (item 1 of theItem) then | |
set toReturn to item 2 of theItem | |
end if | |
end repeat | |
end if | |
return toReturn | |
end encode_char | |
-- this sub-routine is used to encode text | |
on encode_text(this_text, encode_URL_A, encode_URL_B) | |
set the standard_characters to "abcdefghijklmnopqrstuvwxyz0123456789" | |
set the URL_A_chars to "$+!'/?;&@=#%><{}[]\"~`^\\|*" | |
set the URL_B_chars to ".-_:" | |
set the acceptable_characters to the standard_characters | |
if encode_URL_A is false then set the acceptable_characters to the acceptable_characters & the URL_A_chars | |
if encode_URL_B is false then set the acceptable_characters to the acceptable_characters & the URL_B_chars | |
set the encoded_text to "" | |
repeat with this_char in this_text | |
if this_char is in the acceptable_characters then | |
set the encoded_text to (the encoded_text & this_char) | |
else | |
set the encoded_text to (the encoded_text & encode_char(this_char)) as string | |
end if | |
end repeat | |
return the encoded_text | |
end encode_text |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment