-
-
Save simonw/5aed8bd87016c77465c23e0dc4563ec9 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import asyncio | |
import httpx | |
import sqlite3 | |
import tempfile | |
import pathlib | |
from typing import Optional | |
class DatabaseTooLargeError(Exception): | |
"""Raised when a database exceeds the maximum allowed size.""" | |
pass | |
class DatabaseValidationError(Exception): | |
"""Raised when a downloaded file fails SQLite validation.""" | |
pass | |
class ContentLengthMismatchError(Exception): | |
"""Raised when the actual content length differs from the declared length.""" | |
pass | |
async def download_db(url: str, max_size_bytes: int = 5 * 1024 * 1024) -> pathlib.Path: | |
""" | |
Download a SQLite database from a URL to a temporary file. | |
Args: | |
url: The URL of the SQLite database to download | |
max_size_bytes: Maximum allowed size in bytes (default: 5MB) | |
Returns: | |
pathlib.Path: Path to the downloaded database file | |
Raises: | |
httpx.HTTPError: If there's an HTTP-related error during download | |
DatabaseTooLargeError: If the database exceeds the size limit | |
ContentLengthMismatchError: If the actual size exceeds the declared size | |
DatabaseValidationError: If the downloaded file is not a valid SQLite database | |
""" | |
# Create a temporary directory for the database | |
temp_dir = pathlib.Path(tempfile.mkdtemp()) | |
db_path = temp_dir / "database.db" | |
# Track the actual bytes received | |
bytes_received = 0 | |
content_length: Optional[int] = None | |
async with httpx.AsyncClient() as client: | |
async with client.stream("GET", url) as response: | |
response.raise_for_status() | |
# Check for Content-Length header | |
if "content-length" in response.headers: | |
content_length = int(response.headers["content-length"]) | |
if content_length > max_size_bytes: | |
raise DatabaseTooLargeError( | |
f"Database size ({content_length} bytes) exceeds the maximum allowed size " | |
f"({max_size_bytes} bytes)" | |
) | |
else: | |
# We'll still enforce the limit while streaming | |
content_length = None | |
# Create and open the file for writing | |
with open(db_path, "wb") as f: | |
async for chunk in response.aiter_bytes(chunk_size=8192): | |
# Update bytes received counter | |
bytes_received += len(chunk) | |
# Check if we've exceeded the size limit | |
if bytes_received > max_size_bytes: | |
# Clean up the file | |
try: | |
db_path.unlink() | |
except: | |
pass | |
if content_length is not None and bytes_received > content_length: | |
raise ContentLengthMismatchError( | |
f"Actual size ({bytes_received} bytes) exceeds declared size " | |
f"({content_length} bytes)" | |
) | |
else: | |
raise DatabaseTooLargeError( | |
f"Database size exceeds the maximum allowed size " | |
f"({max_size_bytes} bytes)" | |
) | |
# Check if content length is being honest when provided | |
if content_length is not None and bytes_received > content_length: | |
# Clean up the file | |
try: | |
db_path.unlink() | |
except: | |
pass | |
raise ContentLengthMismatchError( | |
f"Actual size ({bytes_received} bytes) exceeds declared size " | |
f"({content_length} bytes)" | |
) | |
# Write the chunk to the file | |
f.write(chunk) | |
# Validate that this is a legitimate SQLite database | |
try: | |
conn = sqlite3.connect(str(db_path)) | |
cursor = conn.cursor() | |
# Run PRAGMA quick_check to validate the database | |
cursor.execute("PRAGMA quick_check") | |
result = cursor.fetchone()[0] | |
if result != "ok": | |
try: | |
db_path.unlink() | |
except: | |
pass | |
raise DatabaseValidationError(f"Database validation failed: {result}") | |
cursor.close() | |
conn.close() | |
except sqlite3.Error as e: | |
# Clean up the file if it's not a valid SQLite database | |
try: | |
db_path.unlink() | |
except: | |
pass | |
raise DatabaseValidationError(f"Not a valid SQLite database: {str(e)}") | |
return db_path |
The point is, don't count on an LLM to produce the clearest, most succinct and maintainable code.
So true, and prompt engineering is key. My standard "rules" which I include in all my contexts mention DRY and KISS principles as well as readability, which helps. I also like to think of the initial output as a draft, after which I have a few refactoring iterations asking the LLM to think about extracting redundancies into functions, to consider performance, or other specific improvements I don't want to do myself.
(Edit: I see the blog post mentions exactly that in its very next section… 😛)
For anybody wondering what blogpost is everybody referring to: https://simonwillison.net/2025/Mar/11/using-llms-for-code/
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Came here from your blog post, where you say Claude wrote it in 15 seconds. I guess this is serviceable, though it has some redundancies that could use cleaning up, especially on lines 72 to 101. I would add a local function just below the initialization of
content_length
:then boil 72-101 down to:
The point is, don't count on an LLM to produce the clearest, most succinct and maintainable code.