Created
March 27, 2026 23:43
-
-
Save PaulKinlan/39072ce8eaf5e0d3d75a0c7d950b58ad to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env node | |
| // Scrapes OpenRouter programming model data: | |
| // 1. RSC payload from rankings page for accurate weekly token breakdowns (all models) | |
| // 2. Pricing pages via headless Chrome for weighted-average costs | |
| // | |
| // Usage: | |
| // node scripts/openrouter-scrape.mjs > reports/openrouter-programming-YYYY-MM-DD.csv | |
| // node scripts/openrouter-scrape.mjs --json > reports/openrouter-programming-YYYY-MM-DD.json | |
| import puppeteer from 'puppeteer-core'; | |
| import https from 'https'; | |
| const OUTPUT_JSON = process.argv.includes('--json'); | |
| const CHROMIUM_PATH = '/usr/bin/chromium'; | |
| const sleep = (ms) => new Promise(r => setTimeout(r, ms)); | |
| function fetchJSON(url) { | |
| return new Promise((resolve, reject) => { | |
| https.get(url, (res) => { | |
| let data = ''; | |
| res.on('data', chunk => data += chunk); | |
| res.on('end', () => { | |
| try { resolve(JSON.parse(data)); } | |
| catch (e) { reject(e); } | |
| }); | |
| }).on('error', reject); | |
| }); | |
| } | |
| function fetchText(url) { | |
| return new Promise((resolve, reject) => { | |
| https.get(url, (res) => { | |
| let data = ''; | |
| res.on('data', chunk => data += chunk); | |
| res.on('end', () => resolve(data)); | |
| }).on('error', reject); | |
| }); | |
| } | |
| async function main() { | |
| // Step 1: Get API pricing for all models | |
| console.error('Fetching model pricing from API...'); | |
| const apiData = await fetchJSON('https://openrouter.ai/api/v1/models'); | |
| const apiPricing = {}; | |
| for (const m of apiData.data || []) { | |
| apiPricing[m.id] = { | |
| name: m.name, | |
| input: parseFloat(m.pricing?.prompt || '0') * 1e6, | |
| output: parseFloat(m.pricing?.completion || '0') * 1e6, | |
| }; | |
| } | |
| // Step 2: Fetch rankings page and parse RSC payload for weekly token data | |
| console.error('Fetching rankings page for RSC data...'); | |
| const html = await fetchText('https://openrouter.ai/rankings/programming?view=week'); | |
| // Parse daily stats from RSC payload (escaped JSON in script tags) | |
| const rscPattern = /\{\\?"date\\?":\\?"[^"]*\\?",\\?"model_permaslug\\?":\\?"([^"\\]+)\\?",\\?"variant\\?":\\?"[^"\\]*\\?",\\?"total_completion_tokens\\?":\s*(\d+),\\?"total_prompt_tokens\\?":\s*(\d+),\\?"total_native_tokens_reasoning\\?":\s*(\d+),\\?"count\\?":\s*(\d+)/g; | |
| const modelStats = {}; | |
| let match; | |
| while ((match = rscPattern.exec(html)) !== null) { | |
| const slug = match[1]; | |
| const completion = parseInt(match[2]); | |
| const prompt = parseInt(match[3]); | |
| const reasoning = parseInt(match[4]); | |
| const count = parseInt(match[5]); | |
| if (!modelStats[slug]) { | |
| modelStats[slug] = { prompt: 0, completion: 0, reasoning: 0, requests: 0 }; | |
| } | |
| modelStats[slug].prompt += prompt; | |
| modelStats[slug].completion += completion; | |
| modelStats[slug].reasoning += reasoning; | |
| modelStats[slug].requests += count; | |
| } | |
| console.error(`Found ${Object.keys(modelStats).length} models from RSC data`); | |
| // Step 3: Build results with API pricing, sort by total tokens | |
| const results = []; | |
| for (const [slug, stats] of Object.entries(modelStats)) { | |
| const total = stats.prompt + stats.completion + stats.reasoning; | |
| if (total === 0) continue; | |
| // Look up API pricing — try exact match, then strip date suffix | |
| let p = apiPricing[slug]; | |
| if (!p) { | |
| const base = slug.replace(/-\d{8}$/, ''); | |
| p = apiPricing[base]; | |
| } | |
| if (!p) { | |
| // Try prefix match | |
| for (const [id, data] of Object.entries(apiPricing)) { | |
| if (slug.startsWith(id) || id.startsWith(slug)) { | |
| p = data; | |
| break; | |
| } | |
| } | |
| } | |
| results.push({ | |
| slug, | |
| displayName: p?.name || slug, | |
| prompt: stats.prompt, | |
| completion: stats.completion, | |
| reasoning: stats.reasoning, | |
| total, | |
| requests: stats.requests, | |
| inputPrice: p?.input || 0, | |
| outputPrice: p?.output || 0, | |
| pricingSource: 'api', | |
| }); | |
| } | |
| results.sort((a, b) => b.total - a.total); | |
| // Step 4: For top models with $0 pricing, scrape the pricing page | |
| const needsPricing = results.filter(r => r.inputPrice === 0 && r.outputPrice === 0).slice(0, 30); | |
| if (needsPricing.length > 0) { | |
| console.error(`Scraping pricing pages for ${needsPricing.length} models with missing pricing...`); | |
| const browser = await puppeteer.launch({ | |
| executablePath: CHROMIUM_PATH, | |
| headless: true, | |
| args: ['--no-sandbox', '--disable-setuid-sandbox'] | |
| }); | |
| const page = await browser.newPage(); | |
| for (const model of needsPricing) { | |
| // Strip date suffix to get the base slug for the URL | |
| const urlSlug = model.slug.replace(/-\d{8}$/, ''); | |
| console.error(` Pricing: ${urlSlug}...`); | |
| try { | |
| await page.goto(`https://openrouter.ai/${urlSlug}/pricing`, { | |
| waitUntil: 'networkidle2', | |
| timeout: 15000 | |
| }); | |
| await sleep(1500); | |
| const pricing = await page.evaluate(() => { | |
| const lines = document.body.innerText.split('\n').map(l => l.trim()).filter(Boolean); | |
| let weightedInput = 0, weightedOutput = 0; | |
| let listedInput = 0, listedOutput = 0; | |
| for (let i = 0; i < lines.length; i++) { | |
| if (lines[i] === 'Weighted Avg Input Price' && i + 1 < lines.length) { | |
| const m = lines[i + 1].match(/^\$([\d.]+)$/); | |
| if (m) weightedInput = parseFloat(m[1]); | |
| } | |
| if (lines[i] === 'Weighted Avg Output Price' && i + 1 < lines.length) { | |
| const m = lines[i + 1].match(/^\$([\d.]+)$/); | |
| if (m) weightedOutput = parseFloat(m[1]); | |
| } | |
| const si = lines[i].match(/Starting at \$([\d.]+)\/M input tokens/); | |
| if (si) listedInput = parseFloat(si[1]); | |
| const so = lines[i].match(/Starting at \$([\d.]+)\/M output tokens/); | |
| if (so) listedOutput = parseFloat(so[1]); | |
| } | |
| return { weightedInput, weightedOutput, listedInput, listedOutput }; | |
| }); | |
| model.inputPrice = pricing.weightedInput || pricing.listedInput || 0; | |
| model.outputPrice = pricing.weightedOutput || pricing.listedOutput || 0; | |
| model.pricingSource = pricing.weightedInput ? 'weighted' : (pricing.listedInput ? 'listed' : 'none'); | |
| } catch (e) { | |
| console.error(` Failed: ${e.message}`); | |
| } | |
| } | |
| await browser.close(); | |
| } | |
| // Step 5: Calculate derived fields and output | |
| for (const r of results) { | |
| r.inputPct = r.total > 0 ? Math.round(r.prompt / r.total * 1000) / 10 : 0; | |
| r.outputPct = r.total > 0 ? Math.round(r.completion / r.total * 1000) / 10 : 0; | |
| r.reasoningPct = r.total > 0 ? Math.round(r.reasoning / r.total * 1000) / 10 : 0; | |
| r.blendedPrice = Math.round((r.inputPrice * 0.934 + r.outputPrice * 0.066) * 10000) / 10000; | |
| r.weeklyCost = Math.round(( | |
| (r.prompt * r.inputPrice / 1e6) + | |
| (r.completion * r.outputPrice / 1e6) + | |
| (r.reasoning * r.outputPrice / 1e6) | |
| ) * 100) / 100; | |
| } | |
| const totalPrompt = results.reduce((s, r) => s + r.prompt, 0); | |
| const totalCompletion = results.reduce((s, r) => s + r.completion, 0); | |
| const totalReasoning = results.reduce((s, r) => s + r.reasoning, 0); | |
| const totalAll = totalPrompt + totalCompletion + totalReasoning; | |
| const totalCost = results.reduce((s, r) => s + r.weeklyCost, 0); | |
| const totalRequests = results.reduce((s, r) => s + r.requests, 0); | |
| if (OUTPUT_JSON) { | |
| console.log(JSON.stringify({ | |
| generated: new Date().toISOString().split('T')[0], | |
| source: 'https://openrouter.ai/rankings/programming?view=week', | |
| summary: { | |
| totalModels: results.length, | |
| totalWeeklyTokens: totalAll, | |
| totalWeeklyCost: Math.round(totalCost * 100) / 100, | |
| totalWeeklyRequests: totalRequests, | |
| overallInputPct: totalAll > 0 ? Math.round(totalPrompt / totalAll * 1000) / 10 : 0, | |
| overallOutputPct: totalAll > 0 ? Math.round(totalCompletion / totalAll * 1000) / 10 : 0, | |
| overallReasoningPct: totalAll > 0 ? Math.round(totalReasoning / totalAll * 1000) / 10 : 0, | |
| }, | |
| models: results.map(r => ({ | |
| model: r.slug, | |
| displayName: r.displayName, | |
| weeklyPromptTokens: r.prompt, | |
| weeklyCompletionTokens: r.completion, | |
| weeklyReasoningTokens: r.reasoning, | |
| weeklyTotalTokens: r.total, | |
| weeklyRequests: r.requests, | |
| inputPct: r.inputPct, | |
| outputPct: r.outputPct, | |
| reasoningPct: r.reasoningPct, | |
| inputPricePerMillion: r.inputPrice, | |
| outputPricePerMillion: r.outputPrice, | |
| blendedPricePerMillion: r.blendedPrice, | |
| weeklyCost: r.weeklyCost, | |
| pricingSource: r.pricingSource, | |
| })), | |
| }, null, 2)); | |
| } else { | |
| console.log('Model,Display Name,Weekly Prompt Tokens,Weekly Completion Tokens,Weekly Reasoning Tokens,Weekly Total Tokens,Weekly Requests,Input %,Output %,Reasoning %,Input $/1M,Output $/1M,Blended $/1M,Weekly Cost ($),Pricing Source'); | |
| for (const r of results) { | |
| console.log(`${r.slug},"${r.displayName}",${r.prompt},${r.completion},${r.reasoning},${r.total},${r.requests},${r.inputPct},${r.outputPct},${r.reasoningPct},${r.inputPrice},${r.outputPrice},${r.blendedPrice},${r.weeklyCost},${r.pricingSource}`); | |
| } | |
| const oip = totalAll > 0 ? Math.round(totalPrompt / totalAll * 1000) / 10 : 0; | |
| const oop = totalAll > 0 ? Math.round(totalCompletion / totalAll * 1000) / 10 : 0; | |
| const orp = totalAll > 0 ? Math.round(totalReasoning / totalAll * 1000) / 10 : 0; | |
| console.log(`TOTAL,"All Models",${totalPrompt},${totalCompletion},${totalReasoning},${totalAll},${totalRequests},${oip},${oop},${orp},,,,${Math.round(totalCost * 100) / 100},`); | |
| } | |
| } | |
| main().catch(e => { console.error(e); process.exit(1); }); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment