Skip to content

Instantly share code, notes, and snippets.

@simonw

simonw/alt.sql Secret

Created April 28, 2025 01:21
Show Gist options
  • Save simonw/5b44a662354e124e33cc1d4704cdb91a to your computer and use it in GitHub Desktop.
Save simonw/5b44a662354e124e33cc1d4704cdb91a to your computer and use it in GitHub Desktop.
WITH
-- Extract images from blog_entry.body (always HTML)
entry_images AS (
SELECT
'https://simonwillison.net/' || to_char(created, 'YYYY/Mon/') || trim(leading '0' from to_char(created, 'DD')) || '/' || slug AS url,
created,
(REGEXP_MATCHES(body, '<img[^>]*?src="([^"]*)"[^>]*?alt="([^"]*)"', 'g'))[1] AS src,
(REGEXP_MATCHES(body, '<img[^>]*?src="([^"]*)"[^>]*?alt="([^"]*)"', 'g'))[2] AS alt_text
FROM blog_entry
WHERE body ~ '<img[^>]*?src="[^"]*"[^>]*?alt="[^"]*"'
UNION ALL
SELECT
'https://simonwillison.net/' || to_char(created, 'YYYY/Mon/') || trim(leading '0' from to_char(created, 'DD')) || '/' || slug AS url,
created,
(REGEXP_MATCHES(body, '<img[^>]*?alt="([^"]*)"[^>]*?src="([^"]*)"', 'g'))[2] AS src,
(REGEXP_MATCHES(body, '<img[^>]*?alt="([^"]*)"[^>]*?src="([^"]*)"', 'g'))[1] AS alt_text
FROM blog_entry
WHERE body ~ '<img[^>]*?alt="[^"]*"[^>]*?src="[^"]*"'
),
-- Extract images from blog_blogmark.commentary (HTML if use_markdown is false)
blogmark_html_images AS (
SELECT
'https://simonwillison.net/' || to_char(created, 'YYYY/Mon/') || trim(leading '0' from to_char(created, 'DD')) || '/' || slug AS url,
created,
(REGEXP_MATCHES(commentary, '<img[^>]*?src="([^"]*)"[^>]*?alt="([^"]*)"', 'g'))[1] AS src,
(REGEXP_MATCHES(commentary, '<img[^>]*?src="([^"]*)"[^>]*?alt="([^"]*)"', 'g'))[2] AS alt_text
FROM blog_blogmark
WHERE use_markdown = false AND commentary ~ '<img[^>]*?src="[^"]*"[^>]*?alt="[^"]*"'
UNION ALL
SELECT
'https://simonwillison.net/' || to_char(created, 'YYYY/Mon/') || trim(leading '0' from to_char(created, 'DD')) || '/' || slug AS url,
created,
(REGEXP_MATCHES(commentary, '<img[^>]*?alt="([^"]*)"[^>]*?src="([^"]*)"', 'g'))[2] AS src,
(REGEXP_MATCHES(commentary, '<img[^>]*?alt="([^"]*)"[^>]*?src="([^"]*)"', 'g'))[1] AS alt_text
FROM blog_blogmark
WHERE use_markdown = false AND commentary ~ '<img[^>]*?alt="[^"]*"[^>]*?src="[^"]*"'
),
-- Extract markdown images from blog_blogmark.commentary (if use_markdown is true)
blogmark_md_images AS (
SELECT
'https://simonwillison.net/' || to_char(created, 'YYYY/Mon/') || trim(leading '0' from to_char(created, 'DD')) || '/' || slug AS url,
created,
(REGEXP_MATCHES(commentary, '!\[([^\]]*)\]\(([^)]*)\)', 'g'))[2] AS src,
(REGEXP_MATCHES(commentary, '!\[([^\]]*)\]\(([^)]*)\)', 'g'))[1] AS alt_text
FROM blog_blogmark
WHERE use_markdown = true AND commentary ~ '!\[[^\]]*\]\([^)]*\)'
),
-- Extract markdown images from blog_quotation.quotation
quotation_images AS (
SELECT
'https://simonwillison.net/' || to_char(created, 'YYYY/Mon/') || trim(leading '0' from to_char(created, 'DD')) || '/' || slug AS url,
created,
(REGEXP_MATCHES(quotation, '!\[([^\]]*)\]\(([^)]*)\)', 'g'))[2] AS src,
(REGEXP_MATCHES(quotation, '!\[([^\]]*)\]\(([^)]*)\)', 'g'))[1] AS alt_text
FROM blog_quotation
WHERE quotation ~ '!\[[^\]]*\]\([^)]*\)'
),
-- Extract markdown images from blog_note.body
note_images AS (
SELECT
'https://simonwillison.net/' || to_char(created, 'YYYY/Mon/') || trim(leading '0' from to_char(created, 'DD')) || '/' || slug AS url,
created,
(REGEXP_MATCHES(body, '!\[([^\]]*)\]\(([^)]*)\)', 'g'))[2] AS src,
(REGEXP_MATCHES(body, '!\[([^\]]*)\]\(([^)]*)\)', 'g'))[1] AS alt_text
FROM blog_note
WHERE body ~ '!\[[^\]]*\]\([^)]*\)'
),
-- Combine all results
all_images AS (
SELECT url, src, alt_text, created FROM entry_images
UNION ALL
SELECT url, src, alt_text, created FROM blogmark_html_images
UNION ALL
SELECT url, src, alt_text, created FROM blogmark_md_images
UNION ALL
SELECT url, src, alt_text, created FROM quotation_images
UNION ALL
SELECT url, src, alt_text, created FROM note_images
)
-- Apply search filter and sort
SELECT url, src, alt_text, created
FROM all_images
WHERE
CASE
WHEN %(search)s = '' THEN true
ELSE
alt_text ILIKE '%%' || %(search)s || '%%' OR
src ILIKE '%%' || %(search)s || '%%'
END
ORDER BY created DESC;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment