#!/usr/bin/env node // Capture homepage hero screenshot for each competitor in the research directory. // Reads per-competitor markdown files, extracts `website` from frontmatter, navigates // via `browse`, and writes one PNG per competitor to `{OUTPUT_DIR}/screenshots/`. // // Requires: `browse` CLI (`npm install -g browse`), either local Chrome (--mode local) // or a Browserbase remote session (--mode remote, the default). // // The browser mode is selected per `browse` command via the --remote / --local flag, // so there is no separate environment-config step — see SKILL.md Step 6 for setup notes. // // Usage: node capture_screenshots.mjs [--mode remote|local] [--concurrency 2] import { readdirSync, readFileSync, mkdirSync, existsSync } from 'fs'; import { join } from 'path'; import { spawnSync } from 'child_process'; import { parseFrontmatter } from './md_utils.mjs'; const args = process.argv.slice(2); if (args.includes('--help') || args.includes('-h') || args.length === 0) { console.error(`Usage: node capture_screenshots.mjs [options] Reads all .md files in , extracts the "website" field from each competitor's YAML frontmatter, and captures a 1280x800 viewport screenshot of the homepage. Writes one PNG per competitor as {slug}-hero.png. Output goes to /screenshots/. Options: --mode Which browse session to use (default: remote). Passed as --remote / --local on each browse command. --concurrency How many competitors to capture in parallel (default: 1) (screenshot takes ~3s; serial is usually fine) --skip-existing Skip competitors that already have screenshots --help, -h Show this help message`); process.exit(args.includes('--help') || args.includes('-h') ? 0 : 1); } const dir = args[0]; const modeIdx = args.indexOf('--mode'); const browseMode = modeIdx !== -1 ? args[modeIdx + 1] : 'remote'; const modeFlag = browseMode === 'local' ? '--local' : '--remote'; // Drive a dedicated named session so we never collide with whatever `browse` session // the user already has open (the default session is bound to one mode — opening it // --remote while a --local session is live errors out). Stopped at the end of the run. const SESSION = 'competitor-analysis-shots'; const browseFlags = [modeFlag, '-s', SESSION]; const concurrencyIdx = args.indexOf('--concurrency'); let concurrency = concurrencyIdx !== -1 ? parseInt(args[concurrencyIdx + 1], 10) : 1; // Floor at 1: `--concurrency 0` would spawn zero workers (no screenshots captured, yet the // script exits "successfully"), and a non-numeric value (NaN) would throw on Array(NaN). // Normalize before the >1 clamp below. if (!Number.isFinite(concurrency) || concurrency < 1) concurrency = 1; const skipExisting = args.includes('--skip-existing'); // All captures share one named `browse` session; parallel `browse open/screenshot` calls would // race on the same tab. Clamp concurrency to 1 and warn rather than silently corrupt output. // (Each capture is fast — ~3-4s — so serial is acceptable.) if (concurrency > 1) { console.error(`Note: clamping --concurrency ${concurrency} to 1 — \`browse\` shares a single session across calls, so parallel screenshots would race on the same tab.`); concurrency = 1; } const shotsDir = join(dir, 'screenshots'); mkdirSync(shotsDir, { recursive: true }); function run(cmd, args, { timeout = 30000 } = {}) { return spawnSync(cmd, args, { encoding: 'utf-8', timeout, maxBuffer: 4 * 1024 * 1024 }); } async function captureOne(slug, website) { const heroPath = join(shotsDir, `${slug}-hero.png`); const result = { slug, hero: null, errors: [] }; if (skipExisting && existsSync(heroPath)) { return { ...result, hero: heroPath, skipped: true }; } // Hero: viewport 1280x800, single-screen shot. The mode + session flags are passed on // each command so every call resolves to the same dedicated browser session. try { const openRes = run('browse', ['open', website, ...browseFlags], { timeout: 30000 }); // `browse open` exits 0 even when navigation fails — it just lands the tab on // `chrome-error://chromewebdata/`. Detect failure from the resulting URL, not the exit // code, so we never screenshot a Chrome error page (and, since the session is reused // across competitors, never save one competitor's page under another's slug). let landedUrl = ''; try { landedUrl = (JSON.parse(openRes.stdout || '{}').url) || ''; } catch { /* non-JSON stdout */ } if (openRes.status !== 0 || !landedUrl || /^chrome-error:\/\//.test(landedUrl) || landedUrl === 'about:blank') { result.errors.push(`open failed (landed: ${landedUrl || 'unknown'}): ${openRes.stderr || openRes.stdout || `exit ${openRes.status}`}`.slice(0, 200)); return result; } run('browse', ['viewport', '1280', '800', ...browseFlags]); run('browse', ['wait', 'timeout', '1500', ...browseFlags]); // let the hero settle const r = run('browse', ['screenshot', '--path', heroPath, '--animations', 'disabled', ...browseFlags]); if (r.status === 0 && existsSync(heroPath)) result.hero = heroPath; else result.errors.push(`hero: ${r.stderr || r.stdout}`); } catch (err) { result.errors.push(`hero exception: ${err.message}`); } return result; } // Load competitor records const files = readdirSync(dir).filter(f => f.endsWith('.md')).sort(); const jobs = []; for (const f of files) { const content = readFileSync(join(dir, f), 'utf-8'); const fm = parseFrontmatter(content); if (!fm || !fm.website) continue; const slug = f.replace('.md', ''); jobs.push({ slug, website: fm.website }); } console.error(`Capturing hero screenshots for ${jobs.length} competitors → ${shotsDir}`); const results = []; const queue = [...jobs]; async function worker() { while (queue.length > 0) { const job = queue.shift(); const started = Date.now(); const r = await captureOne(job.slug, job.website); results.push(r); const elapsed = ((Date.now() - started) / 1000).toFixed(1); const mark = r.hero ? 'H' : '-'; console.error(` [${mark}] ${job.slug.padEnd(24)} ${elapsed}s ${r.skipped ? '(skipped)' : ''}`); if (r.errors.length) for (const e of r.errors) console.error(` ! ${e.slice(0, 120)}`); } } await Promise.all(Array(Math.min(concurrency, jobs.length || 1)).fill(0).map(worker)); // Tear down the dedicated session so we don't leak a running browser (or remote // Browserbase session) after the run. `browse stop` takes only `-s ` — it does NOT // accept --remote/--local (passing them errors out), and `stop -s ` reliably stops // a remote Browserbase session (verified against browse v0.8.5). Best-effort — ignore failures. run('browse', ['stop', '-s', SESSION]); const okHero = results.filter(r => r.hero).length; console.error(`\nDone: ${okHero}/${jobs.length} hero`); console.log(JSON.stringify({ total: jobs.length, hero: okHero, outputDir: shotsDir }));