Created
January 23, 2025 22:19
Replace X clipboard with LLM-ready cleaned up context
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Exit unless xclip and html2markdown are installed, using the return code of 'which' | |
if ! which xclip >/dev/null 2>&1; then | |
echo "xclip is not installed" >&2 | |
exit 2 | |
fi | |
if ! which html2markdown >/dev/null 2>&1; then | |
echo "html2markdown is not installed" >&2 | |
exit 2 | |
fi | |
function warn() { | |
if [[ -t 1 ]]; then | |
echo "$@" >&2 | |
else | |
gxmessage -buttons OK -default OK "$@" | |
fi | |
} | |
# Get next sequential number from counter file | |
COUNTER_FILE="/tmp/webclip-counter" | |
LOCK_FILE="${COUNTER_FILE}.lock" | |
touch "$LOCK_FILE" | |
exec 9>"$LOCK_FILE" | |
if ! flock --timeout 2 9; then | |
warn "Could not acquire lock within 2 seconds" | |
exec 9>&- | |
exit 3 | |
fi | |
if [[ ! -f "$COUNTER_FILE" ]]; then | |
echo "1" > "$COUNTER_FILE" | |
fi | |
SERIAL=$(cat "$COUNTER_FILE") | |
echo $((SERIAL + 1)) > "$COUNTER_FILE" | |
flock -u 9 | |
exec 9>&- | |
TEMP="/tmp/webclip-$(printf "%03d" "$SERIAL")" | |
# Get rich text clipboard and URL | |
declare -a HTML_TARGETS=("text/html" "text/html;*" "html" "HTML") | |
declare -a TEXT_TARGETS=("text/plain" "text/plain;*" "UTF8_STRING" "STRING" "TEXT") | |
declare -a URL_TARGETS=("chromium/x-source-url" "text/x-moz-url") | |
# Find best available HTML target | |
HTML_TARGET="" | |
TEXT_TARGET="" | |
while IFS= read -r target; do | |
for supported in "${HTML_TARGETS[@]}"; do | |
if [[ "$target" == "$supported" ]]; then | |
HTML_TARGET="$target" | |
break 2 | |
fi | |
done | |
done < <(xclip -selection clipboard -o -t TARGETS) | |
if [[ -z "$HTML_TARGET" ]]; then | |
while IFS= read -r target; do | |
for supported in "${TEXT_TARGETS[@]}"; do | |
if [[ "$target" == "$supported" ]]; then | |
TEXT_TARGET="$target" | |
break 2 | |
fi | |
done | |
done < <(xclip -selection clipboard -o -t TARGETS) | |
if [[ -z "$TEXT_TARGET" ]]; then | |
warn "Could not find a supported clipboard format" | |
exit 1 | |
fi | |
fi | |
# Find best available URL target | |
URL_TARGET="" | |
while IFS= read -r target; do | |
for supported in "${URL_TARGETS[@]}"; do | |
if [[ "$target" == "$supported" ]]; then | |
URL_TARGET="$target" | |
break 2 | |
fi | |
done | |
done < <(xclip -selection clipboard -o -t TARGETS) | |
xclip -selection clipboard -o -t "${HTML_TARGET}${TEXT_TARGET}" >"${TEMP}.html" | |
URL="" | |
if [[ -n "$URL_TARGET" ]]; then | |
URL=$(xclip -selection clipboard -o -t "$URL_TARGET") | |
fi | |
# 2. Sanitize | |
echo "" >"${TEMP}.md" | |
echo "<reference>" >>"${TEMP}.md" | |
echo "<number>${SERIAL}</number>" >>"${TEMP}.md" | |
if [[ -n "$URL" ]]; then | |
echo "<url>${URL}</url>" >>"${TEMP}.md" | |
fi | |
echo "<content>" >>"${TEMP}.md" | |
if [[ -n "$HTML_TARGET" ]]; then | |
html2markdown --images-to-alt --escape-all --no-automatic-links --mark-code <"${TEMP}.html" >>"${TEMP}.md" | |
else | |
cat "${TEMP}.html" >>"${TEMP}.md" | |
fi | |
echo "</content>" >>"${TEMP}.md" | |
echo "</reference>" >>"${TEMP}.md" | |
echo "" >>"${TEMP}.md" | |
# 3. Replace into both clipboards | |
# (We NEVER know which one the user intends to use…) | |
# Note that unlike Chromium, xclip cannot set multiple targets, so we | |
# choose "STRING" since "TEXT" rarely worked. | |
xclip -selection primary -i -t STRING <"${TEMP}.md" | |
xclip -selection clipboard -i -t STRING <"${TEMP}.md" | |
MD_LINES=$(wc -l <"${TEMP}.md") | |
MD_WORDS=$(wc -w <"${TEMP}.md") | |
if [[ -n "$HTML_TARGET" ]]; then | |
warn "HTML Clipboard is now LLM-friendly Markdown: ${MD_WORDS} words, ${MD_LINES} lines" | |
else | |
warn "TEXT Clipboard is now LLM-friendly Markdown: ${MD_WORDS} words, ${MD_LINES} lines" | |
fi | |
# Willingly leaving the source and sanitized files in place for human | |
# inspection. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment