Skip to content

Instantly share code, notes, and snippets.

@PaulKinlan
Created March 27, 2026 23:43
Show Gist options
  • Select an option

  • Save PaulKinlan/39072ce8eaf5e0d3d75a0c7d950b58ad to your computer and use it in GitHub Desktop.

Select an option

Save PaulKinlan/39072ce8eaf5e0d3d75a0c7d950b58ad to your computer and use it in GitHub Desktop.
#!/usr/bin/env node
// Scrapes OpenRouter programming model data:
// 1. RSC payload from rankings page for accurate weekly token breakdowns (all models)
// 2. Pricing pages via headless Chrome for weighted-average costs
//
// Usage:
// node scripts/openrouter-scrape.mjs > reports/openrouter-programming-YYYY-MM-DD.csv
// node scripts/openrouter-scrape.mjs --json > reports/openrouter-programming-YYYY-MM-DD.json
import puppeteer from 'puppeteer-core';
import https from 'https';
const OUTPUT_JSON = process.argv.includes('--json');
const CHROMIUM_PATH = '/usr/bin/chromium';
const sleep = (ms) => new Promise(r => setTimeout(r, ms));
function fetchJSON(url) {
return new Promise((resolve, reject) => {
https.get(url, (res) => {
let data = '';
res.on('data', chunk => data += chunk);
res.on('end', () => {
try { resolve(JSON.parse(data)); }
catch (e) { reject(e); }
});
}).on('error', reject);
});
}
function fetchText(url) {
return new Promise((resolve, reject) => {
https.get(url, (res) => {
let data = '';
res.on('data', chunk => data += chunk);
res.on('end', () => resolve(data));
}).on('error', reject);
});
}
async function main() {
// Step 1: Get API pricing for all models
console.error('Fetching model pricing from API...');
const apiData = await fetchJSON('https://openrouter.ai/api/v1/models');
const apiPricing = {};
for (const m of apiData.data || []) {
apiPricing[m.id] = {
name: m.name,
input: parseFloat(m.pricing?.prompt || '0') * 1e6,
output: parseFloat(m.pricing?.completion || '0') * 1e6,
};
}
// Step 2: Fetch rankings page and parse RSC payload for weekly token data
console.error('Fetching rankings page for RSC data...');
const html = await fetchText('https://openrouter.ai/rankings/programming?view=week');
// Parse daily stats from RSC payload (escaped JSON in script tags)
const rscPattern = /\{\\?"date\\?":\\?"[^"]*\\?",\\?"model_permaslug\\?":\\?"([^"\\]+)\\?",\\?"variant\\?":\\?"[^"\\]*\\?",\\?"total_completion_tokens\\?":\s*(\d+),\\?"total_prompt_tokens\\?":\s*(\d+),\\?"total_native_tokens_reasoning\\?":\s*(\d+),\\?"count\\?":\s*(\d+)/g;
const modelStats = {};
let match;
while ((match = rscPattern.exec(html)) !== null) {
const slug = match[1];
const completion = parseInt(match[2]);
const prompt = parseInt(match[3]);
const reasoning = parseInt(match[4]);
const count = parseInt(match[5]);
if (!modelStats[slug]) {
modelStats[slug] = { prompt: 0, completion: 0, reasoning: 0, requests: 0 };
}
modelStats[slug].prompt += prompt;
modelStats[slug].completion += completion;
modelStats[slug].reasoning += reasoning;
modelStats[slug].requests += count;
}
console.error(`Found ${Object.keys(modelStats).length} models from RSC data`);
// Step 3: Build results with API pricing, sort by total tokens
const results = [];
for (const [slug, stats] of Object.entries(modelStats)) {
const total = stats.prompt + stats.completion + stats.reasoning;
if (total === 0) continue;
// Look up API pricing — try exact match, then strip date suffix
let p = apiPricing[slug];
if (!p) {
const base = slug.replace(/-\d{8}$/, '');
p = apiPricing[base];
}
if (!p) {
// Try prefix match
for (const [id, data] of Object.entries(apiPricing)) {
if (slug.startsWith(id) || id.startsWith(slug)) {
p = data;
break;
}
}
}
results.push({
slug,
displayName: p?.name || slug,
prompt: stats.prompt,
completion: stats.completion,
reasoning: stats.reasoning,
total,
requests: stats.requests,
inputPrice: p?.input || 0,
outputPrice: p?.output || 0,
pricingSource: 'api',
});
}
results.sort((a, b) => b.total - a.total);
// Step 4: For top models with $0 pricing, scrape the pricing page
const needsPricing = results.filter(r => r.inputPrice === 0 && r.outputPrice === 0).slice(0, 30);
if (needsPricing.length > 0) {
console.error(`Scraping pricing pages for ${needsPricing.length} models with missing pricing...`);
const browser = await puppeteer.launch({
executablePath: CHROMIUM_PATH,
headless: true,
args: ['--no-sandbox', '--disable-setuid-sandbox']
});
const page = await browser.newPage();
for (const model of needsPricing) {
// Strip date suffix to get the base slug for the URL
const urlSlug = model.slug.replace(/-\d{8}$/, '');
console.error(` Pricing: ${urlSlug}...`);
try {
await page.goto(`https://openrouter.ai/${urlSlug}/pricing`, {
waitUntil: 'networkidle2',
timeout: 15000
});
await sleep(1500);
const pricing = await page.evaluate(() => {
const lines = document.body.innerText.split('\n').map(l => l.trim()).filter(Boolean);
let weightedInput = 0, weightedOutput = 0;
let listedInput = 0, listedOutput = 0;
for (let i = 0; i < lines.length; i++) {
if (lines[i] === 'Weighted Avg Input Price' && i + 1 < lines.length) {
const m = lines[i + 1].match(/^\$([\d.]+)$/);
if (m) weightedInput = parseFloat(m[1]);
}
if (lines[i] === 'Weighted Avg Output Price' && i + 1 < lines.length) {
const m = lines[i + 1].match(/^\$([\d.]+)$/);
if (m) weightedOutput = parseFloat(m[1]);
}
const si = lines[i].match(/Starting at \$([\d.]+)\/M input tokens/);
if (si) listedInput = parseFloat(si[1]);
const so = lines[i].match(/Starting at \$([\d.]+)\/M output tokens/);
if (so) listedOutput = parseFloat(so[1]);
}
return { weightedInput, weightedOutput, listedInput, listedOutput };
});
model.inputPrice = pricing.weightedInput || pricing.listedInput || 0;
model.outputPrice = pricing.weightedOutput || pricing.listedOutput || 0;
model.pricingSource = pricing.weightedInput ? 'weighted' : (pricing.listedInput ? 'listed' : 'none');
} catch (e) {
console.error(` Failed: ${e.message}`);
}
}
await browser.close();
}
// Step 5: Calculate derived fields and output
for (const r of results) {
r.inputPct = r.total > 0 ? Math.round(r.prompt / r.total * 1000) / 10 : 0;
r.outputPct = r.total > 0 ? Math.round(r.completion / r.total * 1000) / 10 : 0;
r.reasoningPct = r.total > 0 ? Math.round(r.reasoning / r.total * 1000) / 10 : 0;
r.blendedPrice = Math.round((r.inputPrice * 0.934 + r.outputPrice * 0.066) * 10000) / 10000;
r.weeklyCost = Math.round((
(r.prompt * r.inputPrice / 1e6) +
(r.completion * r.outputPrice / 1e6) +
(r.reasoning * r.outputPrice / 1e6)
) * 100) / 100;
}
const totalPrompt = results.reduce((s, r) => s + r.prompt, 0);
const totalCompletion = results.reduce((s, r) => s + r.completion, 0);
const totalReasoning = results.reduce((s, r) => s + r.reasoning, 0);
const totalAll = totalPrompt + totalCompletion + totalReasoning;
const totalCost = results.reduce((s, r) => s + r.weeklyCost, 0);
const totalRequests = results.reduce((s, r) => s + r.requests, 0);
if (OUTPUT_JSON) {
console.log(JSON.stringify({
generated: new Date().toISOString().split('T')[0],
source: 'https://openrouter.ai/rankings/programming?view=week',
summary: {
totalModels: results.length,
totalWeeklyTokens: totalAll,
totalWeeklyCost: Math.round(totalCost * 100) / 100,
totalWeeklyRequests: totalRequests,
overallInputPct: totalAll > 0 ? Math.round(totalPrompt / totalAll * 1000) / 10 : 0,
overallOutputPct: totalAll > 0 ? Math.round(totalCompletion / totalAll * 1000) / 10 : 0,
overallReasoningPct: totalAll > 0 ? Math.round(totalReasoning / totalAll * 1000) / 10 : 0,
},
models: results.map(r => ({
model: r.slug,
displayName: r.displayName,
weeklyPromptTokens: r.prompt,
weeklyCompletionTokens: r.completion,
weeklyReasoningTokens: r.reasoning,
weeklyTotalTokens: r.total,
weeklyRequests: r.requests,
inputPct: r.inputPct,
outputPct: r.outputPct,
reasoningPct: r.reasoningPct,
inputPricePerMillion: r.inputPrice,
outputPricePerMillion: r.outputPrice,
blendedPricePerMillion: r.blendedPrice,
weeklyCost: r.weeklyCost,
pricingSource: r.pricingSource,
})),
}, null, 2));
} else {
console.log('Model,Display Name,Weekly Prompt Tokens,Weekly Completion Tokens,Weekly Reasoning Tokens,Weekly Total Tokens,Weekly Requests,Input %,Output %,Reasoning %,Input $/1M,Output $/1M,Blended $/1M,Weekly Cost ($),Pricing Source');
for (const r of results) {
console.log(`${r.slug},"${r.displayName}",${r.prompt},${r.completion},${r.reasoning},${r.total},${r.requests},${r.inputPct},${r.outputPct},${r.reasoningPct},${r.inputPrice},${r.outputPrice},${r.blendedPrice},${r.weeklyCost},${r.pricingSource}`);
}
const oip = totalAll > 0 ? Math.round(totalPrompt / totalAll * 1000) / 10 : 0;
const oop = totalAll > 0 ? Math.round(totalCompletion / totalAll * 1000) / 10 : 0;
const orp = totalAll > 0 ? Math.round(totalReasoning / totalAll * 1000) / 10 : 0;
console.log(`TOTAL,"All Models",${totalPrompt},${totalCompletion},${totalReasoning},${totalAll},${totalRequests},${oip},${oop},${orp},,,,${Math.round(totalCost * 100) / 100},`);
}
}
main().catch(e => { console.error(e); process.exit(1); });
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment