Created
February 7, 2020 18:51
-
-
Save pryrt/5c7b1c2da41b3fa264a2c576ac46e140 to your computer and use it in GitHub Desktop.
Notepad++ PythonScript: Replace Backslash Sequence
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# encoding=utf-8 | |
""" pyscReplaceBackslashSequence | |
When run, this script will search backwards from the current cursor position, | |
looking for a backslash \ . If it finds intervening whitespace (tab, space, | |
newline, NUL), it will abort the search. If it finds a sequence of \ to the | |
current cursor without whitespace interruption, it will try to interpret that | |
as a "shortcut sequence", and try to interpret that. It will ignore any text | |
after the current position, so careful placement of the cursor when running | |
this script will allow inserting a character in between other text | |
For now, it recognizes shortcuts of the form | |
* \uXXXX or \u+XXXX or \0xXXXX, all three of which will be replaced by the | |
unicode character at the hexadecimal codepoint XXXX. | |
* EXAMPLE: \u+2611 => ☑ | |
* EXAMPLE: |\U+2611other| => |☑other| if the cursor is between 1 and o | |
* FUTURE: I might do other snippets or shorcuts; however, after learning of the | |
nppQuickText plugin, which VinsWorldcom/VincentMichael converted to 64-bit | |
and did fixes for at https://github.com/vinsworldcom/nppQuickText, I am not | |
sure anything more is needed | |
Assumes UTF8 encoding of the file (or, rather, that getText/getTextRange returns a series | |
of UTF-8 octets). | |
Work inspired by to https://notepad-plus-plus.org/community/topic/18873/ . | |
I know it had been asked previously, similar to https://community.notepad-plus-plus.org/post/37979 (keyboard map chords, like ^X^C, or eko's ^K^L) | |
-- but that one seems to watch every keystroke, which is more than I want. | |
I actually wanted it to be more like MS Word's Alt+X functionality, where if you type the 4-digit hex for a unicode, then press Alt+X, it will replace those four digits with the unicode character. | |
""" | |
from Npp import * | |
import re | |
def run_pyscReplaceBackslashSequence(): | |
"""this is the function's doc string""" | |
#console.show() | |
#console.clear() | |
currentCursorPosition = editor.getCurrentPos() | |
foundstartpos = -1 | |
searchpos = currentCursorPosition | |
#console.write(__file__ + "::" + __name__ + "::{}..{}".format(searchpos, currentCursorPosition) + "\n") | |
while searchpos > 0: | |
searchpos -= 1 | |
c = editor.getCharAt(searchpos) | |
if c<0: | |
# utf8: first byte&0xC0 is 0xC0; subsequent bytes in the char are 0x80 | |
# thus, step backword a byte while those aren't start-byte | |
while searchpos>0 and c & 0xc0 != 0xc0: | |
searchpos -= 1 | |
c = editor.getCharAt(searchpos) | |
q = editor.positionAfter(searchpos) | |
#s = "searching" | |
#console.write("\t{}: {}..{}\n".format(s, searchpos, q)) | |
#s = "found" | |
#console.write("\t{}: {}..{}\n".format(s, searchpos, q)) | |
s = editor.getTextRange(searchpos,q).decode('utf-8') | |
if len(s)==1: | |
c = ord(s) | |
elif len(s)==2: | |
c = 0x10000 + (ord(s[0]) - 0xD800) * 0x400 + (ord(s[1]) - 0xDC00) | |
else: | |
c = ord(s) # will probably give an exception | |
elif c>255: | |
console.writeError("unknown character {} while searching for \\".format(c)) | |
s = unichr(c) # should probably create an exception | |
else: | |
s = unichr(c) | |
#info = "#{0:5}# '{2}' = HEX:0x{1:04X} = DEC:{1} ".format(searchpos, c, s.encode('utf-8') if c not in [13, 10, 0] else 'LINE-ENDING' if c != 0 else 'END-OF-FILE') | |
#console.write(info + "\n") | |
if c in [0, 10, 13, 8, 32]: # nul, newline, horizontal whitespace | |
foundstartpos = -1 | |
break | |
if s == '\\': | |
foundstartpos = searchpos | |
break | |
#console.write(__file__ + "::" + __name__ + "::{}..{}..{}".format(foundstartpos, searchpos, currentCursorPosition) + "\n") | |
if foundstartpos<0: | |
return | |
editor.setSel(foundstartpos, currentCursorPosition) | |
s = editor.getTextRange(foundstartpos, currentCursorPosition) | |
#console.write("\n\n" + "FOUND: '{}'\n".format(s) + "\n\n") | |
# OK: look for unicode escapes: ☑ | |
# \u2611 | |
# \U+2611 | |
# \0x2611 | |
editor.beginUndoAction(); | |
editor.rereplace( r'\\(?:u\+?|0x)([0-9A-F]{4})', lambda m : unichr( int(m.group(1), 16) ).encode('utf-8') , re.IGNORECASE, foundstartpos, currentCursorPosition) | |
editor.endUndoAction(); | |
# TODO: lookup the found text in the shortcuts ini file, or wherever it is | |
if __name__ == '__main__': run_pyscReplaceBackslashSequence() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment