Skip to content

Instantly share code, notes, and snippets.

@laiso
Last active November 28, 2025 12:24
Show Gist options
  • Select an option

  • Save laiso/33984289411189ca3e94e083e51999f8 to your computer and use it in GitHub Desktop.

Select an option

Save laiso/33984289411189ca3e94e083e51999f8 to your computer and use it in GitHub Desktop.
const fs = require('fs');
const path = require('path');
const BATCH_RESULTS_DIR = path.join(process.cwd(), 'batch_results');
function getAllScoresFiles(dir) {
let results = [];
const list = fs.readdirSync(dir);
list.forEach(file => {
const filePath = path.join(dir, file);
const stat = fs.statSync(filePath);
if (stat && stat.isDirectory()) {
results = results.concat(getAllScoresFiles(filePath));
} else {
if (file === 'scores.json') {
results.push(filePath);
}
}
});
return results;
}
function main() {
if (!fs.existsSync(BATCH_RESULTS_DIR)) {
console.error('batch_results directory not found.');
return;
}
const scoreFiles = getAllScoresFiles(BATCH_RESULTS_DIR);
console.log(`Found ${scoreFiles.length} score files.`);
const stats = {};
for (const file of scoreFiles) {
try {
const content = fs.readFileSync(file, 'utf-8');
const scores = JSON.parse(content);
for (const [model, score] of Object.entries(scores)) {
if (!stats[model]) {
stats[model] = {
totalScore: 0,
count: 0,
min: Infinity,
max: -Infinity,
scores: []
};
}
stats[model].totalScore += score;
stats[model].count += 1;
stats[model].min = Math.min(stats[model].min, score);
stats[model].max = Math.max(stats[model].max, score);
stats[model].scores.push(score);
}
} catch (e) {
console.error(`Error reading ${file}:`, e);
}
}
// Calculate averages and format output
const results = Object.entries(stats).map(([model, data]) => {
return {
model,
average: data.totalScore / data.count,
min: data.min,
max: data.max,
count: data.count
};
});
// Sort by average score (ascending, lower is better)
results.sort((a, b) => a.average - b.average);
console.log('\n### Batch Results Summary\n');
console.log('| Model | Average Score | Min Score | Max Score | Samples |');
console.log('|---|---|---|---|---|');
for (const r of results) {
console.log(`| ${r.model} | ${r.average.toFixed(4)} | ${r.min.toFixed(4)} | ${r.max.toFixed(4)} | ${r.count} |`);
}
console.log('\n(Score: 0 is perfect match, lower is better)\n');
}
main();
import { chromium } from 'playwright';
import path from 'path';
import fs from 'fs/promises';
import { modelKeys as models } from './config';
const BASE_URL = 'http://localhost:3000';
const VIEWPORT_WIDTH = 1440;
async function main() {
const browser = await chromium.launch();
const page = await browser.newPage();
const resultsDir = path.join(process.cwd(), 'results');
await fs.mkdir(resultsDir, { recursive: true });
for (const model of models) {
console.log(`Capturing screenshot for ${model}...`);
try {
await page.setViewportSize({ width: VIEWPORT_WIDTH, height: 1000 }); // Height will be adjusted or irrelevant if not fullPage
await page.goto(`${BASE_URL}/${model}`, { waitUntil: 'networkidle' });
// README: scrollTo(0,0)
await page.evaluate(() => window.scrollTo(0, 0));
// README: fullPage: false, deviceScaleFactor: 1 (default)
await page.screenshot({
path: path.join(resultsDir, `${model}.png`),
fullPage: false,
});
console.log(`Saved screenshot to results/${model}.png`);
} catch (error) {
console.error(`Error capturing ${model}:`, error);
}
}
await browser.close();
}
main();
import fs from 'fs';
import path from 'path';
import { PNG } from 'pngjs';
import pixelmatch from 'pixelmatch';
import { modelKeys as models } from './config';
function readPng(filepath: string): Promise<PNG> {
return new Promise((resolve, reject) => {
const stream = fs.createReadStream(filepath);
const png = new PNG();
stream.pipe(png).on('parsed', () => resolve(png)).on('error', reject);
});
}
async function main() {
const referenceImagePath = process.argv[2];
if (!referenceImagePath) {
console.error('Usage: npx tsx scripts/compare-images.ts <reference-image-path>');
process.exit(1);
}
const resultsDir = path.join(process.cwd(), 'results');
const scores: Record<string, number> = {};
try {
const referenceImage = await readPng(referenceImagePath);
for (const model of models) {
const screenshotPath = path.join(resultsDir, `${model}.png`);
if (!fs.existsSync(screenshotPath)) {
console.warn(`Screenshot not found for ${model}, skipping.`);
continue;
}
const screenshot = await readPng(screenshotPath);
// Ensure dimensions match for pixelmatch
const width = Math.min(referenceImage.width, screenshot.width);
const height = Math.min(referenceImage.height, screenshot.height);
if (referenceImage.width !== screenshot.width || referenceImage.height !== screenshot.height) {
console.warn(`Dimension mismatch for ${model}: Ref ${referenceImage.width}x${referenceImage.height} vs Gen ${screenshot.width}x${screenshot.height}. Cropping to ${width}x${height}.`);
}
// Create cropped buffers
const croppedRef = new PNG({ width, height });
const croppedScreenshot = new PNG({ width, height });
referenceImage.bitblt(croppedRef, 0, 0, width, height, 0, 0);
screenshot.bitblt(croppedScreenshot, 0, 0, width, height, 0, 0);
const diff = new PNG({ width, height });
const numDiffPixels = pixelmatch(
croppedRef.data,
croppedScreenshot.data,
diff.data,
width,
height,
{ threshold: 0.1 }
);
const totalPixels = width * height;
const score = numDiffPixels / totalPixels; // 0 to 1
scores[model] = score;
console.log(`Score for ${model}: ${score}`);
fs.writeFileSync(path.join(resultsDir, `${model}-diff.png`), PNG.sync.write(diff));
}
// Save scores
fs.writeFileSync(path.join(resultsDir, 'scores.json'), JSON.stringify(scores, null, 2));
} catch (error) {
console.error('Error comparing images:', error);
}
}
main();
import dotenv from 'dotenv';
// Load environment variables BEFORE initializing providers
dotenv.config();
import { openai } from '@ai-sdk/openai';
import { anthropic } from '@ai-sdk/anthropic';
import { google } from '@ai-sdk/google';
export { modelKeys } from './model-keys';
export const models = {
'gemini-3-pro-preview': google('gemini-3-pro-preview'),
'claude-opus-4-5-20251101': anthropic('claude-opus-4-5-20251101'),
'gpt-5.1-codex': openai('gpt-5.1-codex'),
'claude-haiku-4-5-20251001': anthropic('claude-haiku-4-5-20251001'),
'gpt-5.1-codex-mini': openai('gpt-5.1-codex-mini'),
'gemini-flash-latest': google('models/gemini-flash-latest'),
};
import { generateText } from 'ai';
import fs from 'fs/promises';
import path from 'path';
import dotenv from 'dotenv';
import { models } from './config';
const PROMPT = `
Please faithfully reproduce the attached UI image using only Next.js and Tailwind CSS.
Maintain the layout, spacing, colors, and proportions as closely as possible.
The code should all go in pages/index.tsx of a Next.js project.
Do not use any external files (such as CSS, components, or public assets).
`;
async function main() {
console.log('Starting code generation script...');
const imagePath = process.argv[2];
if (!imagePath) {
console.error('Usage: npx tsx scripts/generate-code.ts <image-path>');
process.exit(1);
}
const imageBuffer = await fs.readFile(imagePath);
for (const [key, model] of Object.entries(models)) {
console.log(`Generating code for ${key}...`);
try {
const { text } = await generateText({
model,
messages: [
{
role: 'user',
content: [
{ type: 'text', text: PROMPT },
{ type: 'image', image: imageBuffer },
],
},
],
});
// Extract code block if present
const codeBlockRegex = /```(?:tsx?|typescript)([\s\S]*?)```/;
const match = text.match(codeBlockRegex);
const code = match ? match[1].trim() : text;
const outputDir = path.join(process.cwd(), 'pages', key);
await fs.mkdir(outputDir, { recursive: true });
await fs.writeFile(path.join(outputDir, 'index.tsx'), code);
console.log(`Saved to pages/${key}/index.tsx`);
} catch (error) {
console.error(`Error generating for ${key}:`, error);
}
}
}
main();
import fs from 'fs';
import path from 'path';
import { modelKeys as models } from './config';
function main() {
const resultsDir = path.join(process.cwd(), 'results');
const scoresPath = path.join(resultsDir, 'scores.json');
let scores: Record<string, number> = {};
if (fs.existsSync(scoresPath)) {
scores = JSON.parse(fs.readFileSync(scoresPath, 'utf-8'));
}
let report = '# UI Reproduction Comparison Report\n\n';
report += '## Comparison Table\n\n';
report += '| Model | Layout Similarity Score (Lower is better) | Comment |\n';
report += '|---|---|---|\n';
for (const model of models) {
const score = scores[model] !== undefined ? scores[model].toFixed(4) : 'N/A';
report += `| ${model} | ${score} | |\n`;
}
report += '\n## Screenshots\n\n';
for (const model of models) {
report += `### ${model}\n\n`;
report += `![${model}](./${model}.png)\n\n`;
report += `**Difference:**\n\n`;
report += `![${model} Diff](./${model}-diff.png)\n\n`;
}
fs.writeFileSync(path.join(resultsDir, 'report.md'), report);
console.log('Report generated at results/report.md');
}
main();
import { modelKeys } from '../scripts/model-keys';
import Image from 'next/image';
import Link from 'next/link';
export default function Home() {
const aspectRatio = 931 / 1440;
// Tailwind's aspect-ratio utility or inline style for precise control
// width 100%, height calculated based on aspect ratio
return (
<div className="min-h-screen bg-gray-100 p-8">
<h1 className="text-3xl font-bold text-center mb-6">UI Reproduction Preview</h1>
{/* Experiment Description */}
<div className="max-w-3xl mx-auto mb-12 text-center text-gray-700 leading-relaxed">
<p className="mb-2">
This page compares how different AI models reproduce a target web UI.
</p>
<p className="mb-2">
Each model was prompted to recreate the same design, and the outputs are displayed below alongside pixel-difference scores.
</p>
<p>
The goal is to measure UI fidelity, structural accuracy, and consistency across models and model tiers.
</p>
</div>
{/* Reference Image Section */}
<div className="max-w-screen-2xl mx-auto mb-16">
<div className="bg-white rounded-lg shadow-lg overflow-hidden">
<div className="p-4 border-b bg-gray-50 flex justify-between items-center">
<h2 className="text-xl font-semibold text-gray-800">Reference Image (Target)</h2>
<a
href="https://dribbble.com/shots/26823739-Nexocube-AI-Intelligence-Platform"
target="_blank"
rel="noopener noreferrer"
className="text-sm text-blue-600 hover:text-blue-800 underline"
>
Original design by Plainthing Studio (Dribbble)
</a>
</div>
<div className="w-full bg-gray-200">
<div style={{ position: 'relative', width: '100%', paddingTop: `${aspectRatio * 100}%` }}>
<Image
src="/reference.png"
alt="Reference UI"
fill
className="object-contain object-top"
/>
</div>
</div>
</div>
</div>
{/* Models Grid */}
<div className="grid grid-cols-1 gap-16 max-w-screen-2xl mx-auto">
{modelKeys.map((model, index) => (
<div key={model} className="flex flex-col bg-white rounded-lg shadow-lg overflow-hidden">
<div className="p-4 border-b bg-gray-50 flex items-center justify-between">
<h2 className="text-2xl font-bold text-gray-800 truncate" title={model}>
<span className="mr-3 text-gray-500">#{index + 1}</span>
<Link href={`/${model}`} className="hover:text-blue-600 hover:underline">
{model}
</Link>
</h2>
</div>
{/* Container to maintain aspect ratio */}
<div className="w-full relative bg-gray-200 border-b border-gray-300">
<div style={{ position: 'relative', width: '100%', paddingTop: `${aspectRatio * 100}%` }}>
<iframe
src={`/${model}`}
className="absolute top-0 left-0 w-[1440px] h-[931px] border-none origin-top-left transform scale-[calc(100%/1440*var(--container-width))] "
style={{
// We need to scale the iframe content to fit the container width
// Since CSS scale depends on the parent width which is dynamic, we might need a wrapper
// or just let the iframe be responsive if the content inside is responsive.
// BUT the goal is often to see how it looks at 1440px width.
// The simple approach for "preview" is usually allowing the iframe to be 100% width/height
// assuming the generated code is responsive.
// If strict 1440px rendering is needed, we need scale transformation.
// For now, let's stick to simple responsive iframe (width 100%) but enforce aspect ratio container.
width: '100%',
height: '100%'
}}
title={`${model} preview`}
loading="lazy"
/>
</div>
</div>
</div>
))}
</div>
</div>
);
}
export const modelKeys = [
'claude-opus-4-5-20251101',
'gpt-5.1-codex',
'gemini-3-pro-preview',
'gemini-flash-latest',
'gpt-5.1-codex-mini',
'claude-haiku-4-5-20251001',
] as const;
import { exec, spawn } from 'child_process';
import util from 'util';
import fs from 'fs/promises';
import path from 'path';
import { modelKeys } from './config';
const execAsync = util.promisify(exec);
// Helper to run command with inherited stdio
function runCommand(command: string, args: string[]): Promise<void> {
return new Promise((resolve, reject) => {
console.log(`Running: ${command} ${args.join(' ')}`);
const child = spawn(command, args, { stdio: 'inherit' });
child.on('close', (code) => {
if (code === 0) {
resolve();
} else {
reject(new Error(`Command failed with code ${code}`));
}
});
child.on('error', (err) => {
reject(err);
});
});
}
const ITERATIONS = 10;
// Wait time in milliseconds to allow Next.js to recompile and HMR to update
const HMR_WAIT_TIME = 5000;
async function main() {
const imagePath = process.argv[2];
if (!imagePath) {
console.error('Usage: npx tsx scripts/run-batch-experiment.ts <image-path>');
process.exit(1);
}
const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
const batchResultsDir = path.join(process.cwd(), 'batch_results', timestamp);
await fs.mkdir(batchResultsDir, { recursive: true });
console.log(`Starting batch experiment with ${ITERATIONS} iterations...`);
console.log(`Results will be saved to ${batchResultsDir}`);
for (let i = 1; i <= ITERATIONS; i++) {
console.log(`\n--- Iteration ${i}/${ITERATIONS} ---`);
try {
// 1. Generate Code
console.log('Generating code...');
await runCommand('npx', ['tsx', 'scripts/generate-code.ts', imagePath]);
// Force touch/update the pages to trigger HMR if generate-code didn't change timestamp enough?
// Actually generate-code writes new content which updates mtime.
// BUT Next.js needs time to detect change and recompile.
console.log(`Waiting ${HMR_WAIT_TIME}ms for Next.js HMR/Recompilation...`);
await new Promise(resolve => setTimeout(resolve, HMR_WAIT_TIME));
// 2. Capture Screenshots
// Ensure Next.js server is running separately or handle start/stop here.
// Assuming Next.js dev server is ALREADY running on port 3000 as per previous context.
console.log('Capturing screenshots...');
await runCommand('npx', ['tsx', 'scripts/capture-screenshots.ts']);
// 3. Compare Images
console.log('Comparing images...');
await runCommand('npx', ['tsx', 'scripts/compare-images.ts', imagePath]);
// 4. Save Iteration Results
const iterationDir = path.join(batchResultsDir, `iteration-${i}`);
await fs.mkdir(iterationDir, { recursive: true });
// Copy generated pages
for (const model of modelKeys) {
const srcPage = path.join(process.cwd(), 'pages', model, 'index.tsx');
const destPage = path.join(iterationDir, `${model}-index.tsx`);
try {
await fs.copyFile(srcPage, destPage);
} catch (e) {
console.warn(`Could not copy source for ${model}: ${e}`);
}
}
// Copy results (screenshots and diffs and scores)
const resultsDir = path.join(process.cwd(), 'results');
const files = await fs.readdir(resultsDir);
for (const file of files) {
await fs.copyFile(path.join(resultsDir, file), path.join(iterationDir, file));
}
console.log(`Iteration ${i} completed.`);
} catch (error) {
console.error(`Error in iteration ${i}:`, error);
}
}
console.log('\nBatch experiment completed.');
}
main();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment