Skip to content

Instantly share code, notes, and snippets.

@simonw
Created March 10, 2025 05:39
Show Gist options
  • Select an option

  • Save simonw/a560b07eef577e6183021d1ccaae7e07 to your computer and use it in GitHub Desktop.

Select an option

Save simonw/a560b07eef577e6183021d1ccaae7e07 to your computer and use it in GitHub Desktop.
╭────────────────────────────────────────────────╮
│ ✻ Welcome to Claude Code research preview! │
│ │
│ /help for help │
│ │
│ cwd: /Users/simon/Dropbox/Development/tools │
╰────────────────────────────────────────────────╯
> This repo is deployed using GitHub Pages. I want to modify the build slightly - before deploying to
GitHub Pages it should run python gather_links.py which will generate a file called
gathered_links.json - then it should run python build_colophon.py which will use that file to create
colophon.html - I want both gathered_links.json and colophon.html to be published to GitHub pages but
I do not want them checked into the repo itself
⏺ Read(file_path: "/Users/simon/Dropbox/Development/tools/gather_links.py")…
  ⎿  #!/usr/bin/env python3
import json
import os
import re
import subprocess
from pathlib import Path
from datetime import datetime
def get_file_commit_details(file_path):
"""
Get the commit details for a specific file.
Returns a list of dictionaries with hash, message, and date.
"""
try:
# Get each commit as a separate record with its hash, date, and message
result = subprocess.run(
["git", "log", "--format=%H|%aI|%B%x00", "--", file_path],
capture_output=True,
text=True,
check=True
)
commits = []
# Split by NULL character which safely separates commits
raw_commits = result.stdout.strip().split("\0")
for raw_commit in raw_commits:
if not raw_commit.strip():
continue
# Find the first pipe to extract commit hash
first_pipe = raw_commit.find('|')
if first_pipe == -1:
continue
commit_hash = raw_commit[:first_pipe]
# Find the second pipe to extract date
second_pipe = raw_commit.find('|', first_pipe + 1)
if second_pipe == -1:
continue
commit_date = raw_commit[first_pipe + 1:second_pipe]
# The rest is the commit message
commit_message = raw_commit[second_pipe + 1:]
commits.append({
"hash": commit_hash,
"date": commit_date,
"message": commit_message
})
return commits
except subprocess.CalledProcessError as e:
print(f"Error getting commit history for {file_path}: {e}")
return []
def extract_urls(text):
"""
Extract URLs from text using regex pattern.
Returns a list of URLs.
"""
# Pattern for URLs that captures the full URL until whitespace or end of string
url_pattern = r'(https?://[^\s]+)'
return re.findall(url_pattern, text)
def main():
# Get current directory
current_dir = Path.cwd()
# Find all HTML files
html_files = list(current_dir.glob("*.html"))
# Dictionary to store results
results = {"pages": {}}
# Process each HTML file
for html_file in html_files:
file_name = html_file.name
print(f"Processing {file_name}...")
# Get commit details for this file
commits = get_file_commit_details(html_file)
if not commits:
continue
# Extract URLs from commit messages
all_urls = []
for commit in commits:
urls = extract_urls(commit["message"])
all_urls.extend(urls)
# Remove duplicates but preserve order
unique_urls = []
for url in all_urls:
if url not in unique_urls:
unique_urls.append(url)
# Add to results if any commits were found
if commits:
results["pages"][file_name] = {
"commits": commits,
"urls": unique_urls
}
# Save results to JSON file
with open("gathered_links.json", "w") as f:
json.dump(results, f, indent=2)
print(f"Processed {len(html_files)} files")
print(f"Found details for {len(results['pages'])} files")
print("Results saved to gathered_links.json")
if __name__ == "__main__":
main()
⏺ Read(file_path: "/Users/simon/Dropbox/Development/tools/build_colophon.py")…
  ⎿  #!/usr/bin/env python3
import json
import re
from datetime import datetime
import html
from pathlib import Path
def linkify_urls(text):
"""Convert URLs in text to clickable links."""
url_pattern = r'(https?://[^\s]+)'
return re.sub(url_pattern, r'<a href="\1" target="_blank">\1</a>', text)
def format_commit_message(message):
"""Format commit message with line breaks and linkified URLs."""
# Escape HTML entities
escaped = html.escape(message)
# Convert newlines to <br>
with_breaks = escaped.replace('\n', '<br>')
# Linkify URLs
return linkify_urls(with_breaks)
def build_colophon():
# Load the gathered_links.json file
try:
with open('gathered_links.json', 'r') as f:
data = json.load(f)
except FileNotFoundError:
print("Error: gathered_links.json not found. Run gather_links.py first.")
return
pages = data.get('pages', {})
if not pages:
print("No pages found in gathered_links.json")
return
# Sort pages by most recent commit date (newest first)
def get_most_recent_date(page_data):
commits = page_data.get('commits', [])
if not commits:
return "0000-00-00T00:00:00"
# Find the most recent commit date
dates = [commit.get('date', "0000-00-00T00:00:00") for commit in commits]
return max(dates) if dates else "0000-00-00T00:00:00"
sorted_pages = sorted(pages.items(), key=lambda x: get_most_recent_date(x[1]), reverse=True
# Start building the HTML
html_content = '''<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Tools Colophon</title>
<style>
body {
font-family: system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto,
Helvetica, Arial, sans-serif;
line-height: 1.5;
max-width: 800px;
margin: 0 auto;
padding: 1rem;
color: #1a1a1a;
}
h1 {
border-bottom: 2px solid #f0f0f0;
padding-bottom: 0.5rem;
margin-top: 2rem;
}
h2 {
margin-top: 2rem;
font-size: 1.4rem;
}
a {
color: #0066cc;
text-decoration: none;
}
a:hover {
text-decoration: underline;
}
.tool {
margin-bottom: 2rem;
border-bottom: 1px solid #f0f0f0;
padding-bottom: 1rem;
}
.tool-name {
font-weight: bold;
}
.commit {
background-color: #f8f9fa;
padding: 1rem;
margin: 0.75rem 0;
border-radius: 4px;
border-left: 3px solid #ddd;
}
.commit-hash {
font-family: monospace;
color: #666;
font-size: 0.85rem;
}
.commit-date {
color: #666;
font-size: 0.85rem;
margin-left: 0.5rem;
}
.commit-message {
margin-top: 0.5rem;
}
.urls {
margin-top: 1rem;
}
@media (max-width: 600px) {
body {
padding: 0.5rem;
}
.commit {
padding: 0.75rem;
}
}
</style>
</head>
<body>
<h1>Tools Colophon</h1>
<p>This page documents the creation of the tools on <a
href="https://tools.simonwillison.net/">tools.simonwillison.net</a>,
including links to the Claude conversations used to build them.</p>
'''
# Add each page with its commits
for page_name, page_data in sorted_pages:
tool_url = f"https://tools.simonwillison.net/{page_name.replace('.html', '')}"
commits = page_data.get('commits', [])
# Reverse the commits list to show oldest first
commits = list(reversed(commits))
html_content += f'''
<div class="tool">
<h2 class="tool-name"><a href="{tool_url}">{page_name}</a></h2>
'''
# Add each commit
for commit in commits:
commit_hash = commit.get('hash', '')
short_hash = commit_hash[:7] if commit_hash else 'unknown'
commit_date = commit.get('date', '')
# Format the date with time
formatted_date = ''
if commit_date:
try:
dt = datetime.fromisoformat(commit_date)
formatted_date = dt.strftime('%B %d, %Y %H:%M')
except ValueError:
formatted_date = commit_date
commit_message = commit.get('message', '')
formatted_message = format_commit_message(commit_message)
commit_url = f"https://github.com/simonw/tools/commit/{commit_hash}"
html_content += f'''
<div class="commit">
<div>
<a href="{commit_url}" class="commit-hash" target="_blank">{short_hash}</a>
<span class="commit-date">{formatted_date}</span>
</div>
<div class="commit-message">{formatted_message}</div>
</div>
'''
html_content += '''
</div>
'''
# Close the HTML
html_content += '''
</body>
</html>
'''
# Write the HTML to colophon.html
with open('colophon.html', 'w') as f:
f.write(html_content)
print("Colophon page built successfully as colophon.html")
if __name__ == "__main__":
build_colophon()
⏺ Search(pattern: ".github/workflows/*", path: "undefined")…
  ⎿  Found 1 file
⏺ Read(file_path: "/Users/simon/Dropbox/Development/tools/.github/workflows/test.yml")…
  ⎿  name: Test
on:
push:
pull_request:
permissions:
contents: read
jobs:
test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Set up Python 3.12
uses: actions/setup-python@v5
with:
python-version: "3.12"
cache: 'pip'
cache-dependency-path: 'dev-requirements.txt'
- name: Cache Playwright browsers
uses: actions/cache@v4
with:
path: ~/.cache/ms-playwright/
key: ${{ runner.os }}-browsers
- name: Install dependencies
run: |
pip install -r dev-requirements.txt
playwright install
- name: Run test
run: |
pytest
⏺ Search(pattern: ".github/workflows/pages.yml", path: "undefined")…
  ⎿  Found 0 files
⏺ Read(file_path: "/Users/simon/Dropbox/Development/tools/dev-requirements.txt")…
  ⎿  pytest-playwright
⏺ I'll create a GitHub Action workflow for deploying to GitHub Pages that runs both Python scripts to
⏺ Create(file_path: /Users/simon/Dropbox/Development/tools/.github/workflows/pages.yml)…
⎿ Updated /Users/simon/Dropbox/Development/tools/.github/workflows/pages.yml with 62 additions
1 name: Deploy to GitHub Pages
2
3 on:
4 # Run on push to main branch
5 push:
6 branches: [ main ]
7 # Allow manual trigger
8 workflow_dispatch:
9
10 # Sets permissions of the GITHUB_TOKEN
11 permissions:
12 contents: read
13 pages: write
14 id-token: write
15
16 # Allow only one concurrent deployment
17 concurrency:
18 group: pages
19 cancel-in-progress: true
20
21 jobs:
22 build:
23 runs-on: ubuntu-latest
24 steps:
25 - name: Checkout
26 uses: actions/checkout@v4
27 with:
28 fetch-depth: 0 # Need full history for git log in gather_links.py
29
30 - name: Setup Python
31 uses: actions/setup-python@v5
32 with:
33 python-version: '3.12'
34
35 - name: Install dependencies
36 run: |
37 python -m pip install --upgrade pip
38
39 - name: Generate links and build colophon
40 run: |
41 python gather_links.py
42 python build_colophon.py
43
44 - name: Setup Pages
45 uses: actions/configure-pages@v4
46
47 - name: Build with Jekyll
48 uses: actions/jekyll-build-pages@v1
49
50 - name: Upload artifact
51 uses: actions/upload-pages-artifact@v3
52
53 deploy:
54 environment:
55 name: github-pages
56 url: ${{ steps.deployment.outputs.page_url }}
57 runs-on: ubuntu-latest
58 needs: build
59 steps:
60 - name: Deploy to GitHub Pages
61 id: deployment
62 uses: actions/deploy-pages@v4
63 \ No newline at end of file
⏺ Create(file_path: /Users/simon/Dropbox/Development/tools/.gitignore)…
⏺ Create(file_path: /Users/simon/Dropbox/Development/tools/.gitignore)…
⎿ Updated /Users/simon/Dropbox/Development/tools/.gitignore with 6 additions
1 # Generated files
2 gathered_links.json
3 colophon.html
4
5 # System files
6 .DS_Store
7 \ No newline at end of file
⏺ I've created a GitHub Actions workflow for GitHub Pages that:
1. Creates a .github/workflows/pages.yml file that:
- Runs on pushes to main branch
- Checkouts the full git history (needed for gather_links.py)
- Sets up Python 3.12
- Runs both scripts in sequence to generate the files
- Builds and deploys to GitHub Pages
2. Created .gitignore file to prevent the generated files from being committed
Now when you push to main, GitHub Actions will:
- Run the gather_links.py script to create gathered_links.json
- Run build_colophon.py to create colophon.html
- Deploy both files (and all other files) to GitHub Pages
> /cost
⎿ Total cost: $0.1788
Total duration (API): 44.6s
Total duration (wall): 10m 18.6s
╭───────────────────────────────────────────────────────────────────────────────────────────────────────╮
│ >  │
╰───────────────────────────────────────────────────────────────────────────────────────────────────────╯
Press Ctrl-C again to exit \⏎ for newline
Total cost: $0.1788
Total duration (API): 44.6s
Total duration (wall): 11m 3.7s
(base) simon@Simons-MacBook-Pro-7 tools %
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment