Skip to content

Instantly share code, notes, and snippets.

@vphantom
Created January 23, 2025 22:19
Replace X clipboard with LLM-ready cleaned up context
#!/bin/bash
# Exit unless xclip and html2markdown are installed, using the return code of 'which'
if ! which xclip >/dev/null 2>&1; then
echo "xclip is not installed" >&2
exit 2
fi
if ! which html2markdown >/dev/null 2>&1; then
echo "html2markdown is not installed" >&2
exit 2
fi
function warn() {
if [[ -t 1 ]]; then
echo "$@" >&2
else
gxmessage -buttons OK -default OK "$@"
fi
}
# Get next sequential number from counter file
COUNTER_FILE="/tmp/webclip-counter"
LOCK_FILE="${COUNTER_FILE}.lock"
touch "$LOCK_FILE"
exec 9>"$LOCK_FILE"
if ! flock --timeout 2 9; then
warn "Could not acquire lock within 2 seconds"
exec 9>&-
exit 3
fi
if [[ ! -f "$COUNTER_FILE" ]]; then
echo "1" > "$COUNTER_FILE"
fi
SERIAL=$(cat "$COUNTER_FILE")
echo $((SERIAL + 1)) > "$COUNTER_FILE"
flock -u 9
exec 9>&-
TEMP="/tmp/webclip-$(printf "%03d" "$SERIAL")"
# Get rich text clipboard and URL
declare -a HTML_TARGETS=("text/html" "text/html;*" "html" "HTML")
declare -a TEXT_TARGETS=("text/plain" "text/plain;*" "UTF8_STRING" "STRING" "TEXT")
declare -a URL_TARGETS=("chromium/x-source-url" "text/x-moz-url")
# Find best available HTML target
HTML_TARGET=""
TEXT_TARGET=""
while IFS= read -r target; do
for supported in "${HTML_TARGETS[@]}"; do
if [[ "$target" == "$supported" ]]; then
HTML_TARGET="$target"
break 2
fi
done
done < <(xclip -selection clipboard -o -t TARGETS)
if [[ -z "$HTML_TARGET" ]]; then
while IFS= read -r target; do
for supported in "${TEXT_TARGETS[@]}"; do
if [[ "$target" == "$supported" ]]; then
TEXT_TARGET="$target"
break 2
fi
done
done < <(xclip -selection clipboard -o -t TARGETS)
if [[ -z "$TEXT_TARGET" ]]; then
warn "Could not find a supported clipboard format"
exit 1
fi
fi
# Find best available URL target
URL_TARGET=""
while IFS= read -r target; do
for supported in "${URL_TARGETS[@]}"; do
if [[ "$target" == "$supported" ]]; then
URL_TARGET="$target"
break 2
fi
done
done < <(xclip -selection clipboard -o -t TARGETS)
xclip -selection clipboard -o -t "${HTML_TARGET}${TEXT_TARGET}" >"${TEMP}.html"
URL=""
if [[ -n "$URL_TARGET" ]]; then
URL=$(xclip -selection clipboard -o -t "$URL_TARGET")
fi
# 2. Sanitize
echo "" >"${TEMP}.md"
echo "<reference>" >>"${TEMP}.md"
echo "<number>${SERIAL}</number>" >>"${TEMP}.md"
if [[ -n "$URL" ]]; then
echo "<url>${URL}</url>" >>"${TEMP}.md"
fi
echo "<content>" >>"${TEMP}.md"
if [[ -n "$HTML_TARGET" ]]; then
html2markdown --images-to-alt --escape-all --no-automatic-links --mark-code <"${TEMP}.html" >>"${TEMP}.md"
else
cat "${TEMP}.html" >>"${TEMP}.md"
fi
echo "</content>" >>"${TEMP}.md"
echo "</reference>" >>"${TEMP}.md"
echo "" >>"${TEMP}.md"
# 3. Replace into both clipboards
# (We NEVER know which one the user intends to use…)
# Note that unlike Chromium, xclip cannot set multiple targets, so we
# choose "STRING" since "TEXT" rarely worked.
xclip -selection primary -i -t STRING <"${TEMP}.md"
xclip -selection clipboard -i -t STRING <"${TEMP}.md"
MD_LINES=$(wc -l <"${TEMP}.md")
MD_WORDS=$(wc -w <"${TEMP}.md")
if [[ -n "$HTML_TARGET" ]]; then
warn "HTML Clipboard is now LLM-friendly Markdown: ${MD_WORDS} words, ${MD_LINES} lines"
else
warn "TEXT Clipboard is now LLM-friendly Markdown: ${MD_WORDS} words, ${MD_LINES} lines"
fi
# Willingly leaving the source and sanitized files in place for human
# inspection.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment