Skip to content

Instantly share code, notes, and snippets.

@alexpovel
Last active January 3, 2024 22:05
Show Gist options
  • Save alexpovel/b757a8176601d4cb8086540368493187 to your computer and use it in GitHub Desktop.
Save alexpovel/b757a8176601d4cb8086540368493187 to your computer and use it in GitHub Desktop.
Set Windows clipboard with rich content (respects e.g. formatting when processing)
import logging
import subprocess as sp
import typing as t
import win32clipboard as wcb # `pip install pywin32`
logging.basicConfig(level=logging.DEBUG)
# Found these manually via "InsideClipboard". Doesn't seem to be documented anywhere.
MS_TEAMS_HTML_FORMAT_ID = 49309
MS_OFFICE_HTML_FORMAT_ID = MS_TEAMS_HTML_FORMAT_ID
def collect_contents(formats: t.Iterable[int]) -> t.Iterable[tuple[int, bytes]]:
"""Collect clipboard contents for all available formats."""
baseline = {wcb.CF_UNICODETEXT, wcb.CF_TEXT}
for format in set(formats) | baseline:
if wcb.IsClipboardFormatAvailable(format):
try:
name = wcb.GetClipboardFormatName(format)
except Exception: # `CF_TEXT` etc. don't have this? 🤷
name = "<unknown>"
logging.info(f"Will use clipboard format {format} ('{name}')")
else:
logging.warning(f"Clipboard format {format} not available, skipping.")
continue
# Original type annotation seems incorrect; this can be either
raw_contents = t.cast(str | bytes, wcb.GetClipboardData(format))
if isinstance(raw_contents, str):
contents = raw_contents.encode("utf8")
else:
contents = raw_contents
try:
contents.decode("utf8")
except UnicodeDecodeError:
try:
contents.decode("utf16")
except UnicodeDecodeError:
logging.error(
"Clipboard contents are neither UTF-8 nor UTF-16, "
+ "cannot continue"
)
raise
else:
logging.info("Clipboard contents are UTF-16, converting to UTF-8")
contents = contents.decode("utf16").encode("utf8")
assert isinstance(contents, bytes)
logging.info(f"Raw clipboard data (length of {len(contents)}):")
logging.info(contents)
yield format, contents
def set_contents(formats_contents: dict[int, bytes]) -> None:
"""Set clipboard contents for all passed formats."""
for format, contents in formats_contents.items():
try:
srgn = sp.run(
["srgn", "--german", "-vvv"],
input=contents,
check=True,
capture_output=True,
)
except sp.CalledProcessError as e:
logging.error(e.stderr)
raise
else:
logging.debug("srgn stderr:")
logging.debug(srgn.stderr)
out = srgn.stdout
logging.info("Raw srgn output:")
logging.info(out)
# An example output of copying from MS Teams, and its format with ID 49309
# is (view with "InsideClipboard"):
#
# ```
# Version:0.9
# StartHTML:0000000105
# EndHTML:0000000192
# StartFragment:0000000141
# EndFragment:0000000156
# <html>
# <body>
# <!--StartFragment--><em>Hello </em><!--EndFragment-->
# </body>
# </html>
# ```
#
# The assumption made here is that we do not need to edit the preamble byte
# offset definitions, as we always only replace two bytes by two others:
# "ue" with "ü" (0xC3BC), etc. So lengths don't change!
assert len(out) == len(contents), (
"srgn output is not the same length as input, "
+ "required for HTML byte offsets to be correct"
)
if format == wcb.CF_UNICODETEXT:
# UTF-16 bytes are expected here!
out = srgn.stdout.decode("utf8").encode("utf16")
else:
out = srgn.stdout
wcb.SetClipboardData(format, out)
def main() -> None:
try:
wcb.OpenClipboard()
formats_contents = {
format: contents
for format, contents in collect_contents(
[MS_TEAMS_HTML_FORMAT_ID, MS_OFFICE_HTML_FORMAT_ID]
)
}
# Doesn't work if not cleared properly 🤷 See also
# https://learn.microsoft.com/en-us/windows/win32/dataxchg/using-the-clipboard#copying-information-to-the-clipboard
wcb.EmptyClipboard()
set_contents(formats_contents)
finally:
wcb.CloseClipboard()
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment