Last active
January 3, 2024 22:05
-
-
Save alexpovel/b757a8176601d4cb8086540368493187 to your computer and use it in GitHub Desktop.
Set Windows clipboard with rich content (respects e.g. formatting when processing)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import logging | |
import subprocess as sp | |
import typing as t | |
import win32clipboard as wcb # `pip install pywin32` | |
logging.basicConfig(level=logging.DEBUG) | |
# Found these manually via "InsideClipboard". Doesn't seem to be documented anywhere. | |
MS_TEAMS_HTML_FORMAT_ID = 49309 | |
MS_OFFICE_HTML_FORMAT_ID = MS_TEAMS_HTML_FORMAT_ID | |
def collect_contents(formats: t.Iterable[int]) -> t.Iterable[tuple[int, bytes]]: | |
"""Collect clipboard contents for all available formats.""" | |
baseline = {wcb.CF_UNICODETEXT, wcb.CF_TEXT} | |
for format in set(formats) | baseline: | |
if wcb.IsClipboardFormatAvailable(format): | |
try: | |
name = wcb.GetClipboardFormatName(format) | |
except Exception: # `CF_TEXT` etc. don't have this? 🤷 | |
name = "<unknown>" | |
logging.info(f"Will use clipboard format {format} ('{name}')") | |
else: | |
logging.warning(f"Clipboard format {format} not available, skipping.") | |
continue | |
# Original type annotation seems incorrect; this can be either | |
raw_contents = t.cast(str | bytes, wcb.GetClipboardData(format)) | |
if isinstance(raw_contents, str): | |
contents = raw_contents.encode("utf8") | |
else: | |
contents = raw_contents | |
try: | |
contents.decode("utf8") | |
except UnicodeDecodeError: | |
try: | |
contents.decode("utf16") | |
except UnicodeDecodeError: | |
logging.error( | |
"Clipboard contents are neither UTF-8 nor UTF-16, " | |
+ "cannot continue" | |
) | |
raise | |
else: | |
logging.info("Clipboard contents are UTF-16, converting to UTF-8") | |
contents = contents.decode("utf16").encode("utf8") | |
assert isinstance(contents, bytes) | |
logging.info(f"Raw clipboard data (length of {len(contents)}):") | |
logging.info(contents) | |
yield format, contents | |
def set_contents(formats_contents: dict[int, bytes]) -> None: | |
"""Set clipboard contents for all passed formats.""" | |
for format, contents in formats_contents.items(): | |
try: | |
srgn = sp.run( | |
["srgn", "--german", "-vvv"], | |
input=contents, | |
check=True, | |
capture_output=True, | |
) | |
except sp.CalledProcessError as e: | |
logging.error(e.stderr) | |
raise | |
else: | |
logging.debug("srgn stderr:") | |
logging.debug(srgn.stderr) | |
out = srgn.stdout | |
logging.info("Raw srgn output:") | |
logging.info(out) | |
# An example output of copying from MS Teams, and its format with ID 49309 | |
# is (view with "InsideClipboard"): | |
# | |
# ``` | |
# Version:0.9 | |
# StartHTML:0000000105 | |
# EndHTML:0000000192 | |
# StartFragment:0000000141 | |
# EndFragment:0000000156 | |
# <html> | |
# <body> | |
# <!--StartFragment--><em>Hello </em><!--EndFragment--> | |
# </body> | |
# </html> | |
# ``` | |
# | |
# The assumption made here is that we do not need to edit the preamble byte | |
# offset definitions, as we always only replace two bytes by two others: | |
# "ue" with "ü" (0xC3BC), etc. So lengths don't change! | |
assert len(out) == len(contents), ( | |
"srgn output is not the same length as input, " | |
+ "required for HTML byte offsets to be correct" | |
) | |
if format == wcb.CF_UNICODETEXT: | |
# UTF-16 bytes are expected here! | |
out = srgn.stdout.decode("utf8").encode("utf16") | |
else: | |
out = srgn.stdout | |
wcb.SetClipboardData(format, out) | |
def main() -> None: | |
try: | |
wcb.OpenClipboard() | |
formats_contents = { | |
format: contents | |
for format, contents in collect_contents( | |
[MS_TEAMS_HTML_FORMAT_ID, MS_OFFICE_HTML_FORMAT_ID] | |
) | |
} | |
# Doesn't work if not cleared properly 🤷 See also | |
# https://learn.microsoft.com/en-us/windows/win32/dataxchg/using-the-clipboard#copying-information-to-the-clipboard | |
wcb.EmptyClipboard() | |
set_contents(formats_contents) | |
finally: | |
wcb.CloseClipboard() | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment