-
-
Save simonw/5b44a662354e124e33cc1d4704cdb91a to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
WITH | |
-- Extract images from blog_entry.body (always HTML) | |
entry_images AS ( | |
SELECT | |
'https://simonwillison.net/' || to_char(created, 'YYYY/Mon/') || trim(leading '0' from to_char(created, 'DD')) || '/' || slug AS url, | |
created, | |
(REGEXP_MATCHES(body, '<img[^>]*?src="([^"]*)"[^>]*?alt="([^"]*)"', 'g'))[1] AS src, | |
(REGEXP_MATCHES(body, '<img[^>]*?src="([^"]*)"[^>]*?alt="([^"]*)"', 'g'))[2] AS alt_text | |
FROM blog_entry | |
WHERE body ~ '<img[^>]*?src="[^"]*"[^>]*?alt="[^"]*"' | |
UNION ALL | |
SELECT | |
'https://simonwillison.net/' || to_char(created, 'YYYY/Mon/') || trim(leading '0' from to_char(created, 'DD')) || '/' || slug AS url, | |
created, | |
(REGEXP_MATCHES(body, '<img[^>]*?alt="([^"]*)"[^>]*?src="([^"]*)"', 'g'))[2] AS src, | |
(REGEXP_MATCHES(body, '<img[^>]*?alt="([^"]*)"[^>]*?src="([^"]*)"', 'g'))[1] AS alt_text | |
FROM blog_entry | |
WHERE body ~ '<img[^>]*?alt="[^"]*"[^>]*?src="[^"]*"' | |
), | |
-- Extract images from blog_blogmark.commentary (HTML if use_markdown is false) | |
blogmark_html_images AS ( | |
SELECT | |
'https://simonwillison.net/' || to_char(created, 'YYYY/Mon/') || trim(leading '0' from to_char(created, 'DD')) || '/' || slug AS url, | |
created, | |
(REGEXP_MATCHES(commentary, '<img[^>]*?src="([^"]*)"[^>]*?alt="([^"]*)"', 'g'))[1] AS src, | |
(REGEXP_MATCHES(commentary, '<img[^>]*?src="([^"]*)"[^>]*?alt="([^"]*)"', 'g'))[2] AS alt_text | |
FROM blog_blogmark | |
WHERE use_markdown = false AND commentary ~ '<img[^>]*?src="[^"]*"[^>]*?alt="[^"]*"' | |
UNION ALL | |
SELECT | |
'https://simonwillison.net/' || to_char(created, 'YYYY/Mon/') || trim(leading '0' from to_char(created, 'DD')) || '/' || slug AS url, | |
created, | |
(REGEXP_MATCHES(commentary, '<img[^>]*?alt="([^"]*)"[^>]*?src="([^"]*)"', 'g'))[2] AS src, | |
(REGEXP_MATCHES(commentary, '<img[^>]*?alt="([^"]*)"[^>]*?src="([^"]*)"', 'g'))[1] AS alt_text | |
FROM blog_blogmark | |
WHERE use_markdown = false AND commentary ~ '<img[^>]*?alt="[^"]*"[^>]*?src="[^"]*"' | |
), | |
-- Extract markdown images from blog_blogmark.commentary (if use_markdown is true) | |
blogmark_md_images AS ( | |
SELECT | |
'https://simonwillison.net/' || to_char(created, 'YYYY/Mon/') || trim(leading '0' from to_char(created, 'DD')) || '/' || slug AS url, | |
created, | |
(REGEXP_MATCHES(commentary, '!\[([^\]]*)\]\(([^)]*)\)', 'g'))[2] AS src, | |
(REGEXP_MATCHES(commentary, '!\[([^\]]*)\]\(([^)]*)\)', 'g'))[1] AS alt_text | |
FROM blog_blogmark | |
WHERE use_markdown = true AND commentary ~ '!\[[^\]]*\]\([^)]*\)' | |
), | |
-- Extract markdown images from blog_quotation.quotation | |
quotation_images AS ( | |
SELECT | |
'https://simonwillison.net/' || to_char(created, 'YYYY/Mon/') || trim(leading '0' from to_char(created, 'DD')) || '/' || slug AS url, | |
created, | |
(REGEXP_MATCHES(quotation, '!\[([^\]]*)\]\(([^)]*)\)', 'g'))[2] AS src, | |
(REGEXP_MATCHES(quotation, '!\[([^\]]*)\]\(([^)]*)\)', 'g'))[1] AS alt_text | |
FROM blog_quotation | |
WHERE quotation ~ '!\[[^\]]*\]\([^)]*\)' | |
), | |
-- Extract markdown images from blog_note.body | |
note_images AS ( | |
SELECT | |
'https://simonwillison.net/' || to_char(created, 'YYYY/Mon/') || trim(leading '0' from to_char(created, 'DD')) || '/' || slug AS url, | |
created, | |
(REGEXP_MATCHES(body, '!\[([^\]]*)\]\(([^)]*)\)', 'g'))[2] AS src, | |
(REGEXP_MATCHES(body, '!\[([^\]]*)\]\(([^)]*)\)', 'g'))[1] AS alt_text | |
FROM blog_note | |
WHERE body ~ '!\[[^\]]*\]\([^)]*\)' | |
), | |
-- Combine all results | |
all_images AS ( | |
SELECT url, src, alt_text, created FROM entry_images | |
UNION ALL | |
SELECT url, src, alt_text, created FROM blogmark_html_images | |
UNION ALL | |
SELECT url, src, alt_text, created FROM blogmark_md_images | |
UNION ALL | |
SELECT url, src, alt_text, created FROM quotation_images | |
UNION ALL | |
SELECT url, src, alt_text, created FROM note_images | |
) | |
-- Apply search filter and sort | |
SELECT url, src, alt_text, created | |
FROM all_images | |
WHERE | |
CASE | |
WHEN %(search)s = '' THEN true | |
ELSE | |
alt_text ILIKE '%%' || %(search)s || '%%' OR | |
src ILIKE '%%' || %(search)s || '%%' | |
END | |
ORDER BY created DESC; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment