playbook/antigravity-awesome-skills/skills/competitor-analysis/scripts/capture_screenshots.mjs

143 lines
6.9 KiB
JavaScript

#!/usr/bin/env node
// Capture homepage hero screenshot for each competitor in the research directory.
// Reads per-competitor markdown files, extracts `website` from frontmatter, navigates
// via `browse`, and writes one PNG per competitor to `{OUTPUT_DIR}/screenshots/`.
//
// Requires: `browse` CLI (`npm install -g browse`), either local Chrome (--mode local)
// or a Browserbase remote session (--mode remote, the default).
//
// The browser mode is selected per `browse` command via the --remote / --local flag,
// so there is no separate environment-config step — see SKILL.md Step 6 for setup notes.
//
// Usage: node capture_screenshots.mjs <research-dir> [--mode remote|local] [--concurrency 2]
import { readdirSync, readFileSync, mkdirSync, existsSync } from 'fs';
import { join } from 'path';
import { spawnSync } from 'child_process';
import { parseFrontmatter } from './md_utils.mjs';
const args = process.argv.slice(2);
if (args.includes('--help') || args.includes('-h') || args.length === 0) {
console.error(`Usage: node capture_screenshots.mjs <research-dir> [options]
Reads all .md files in <research-dir>, extracts the "website" field from each
competitor's YAML frontmatter, and captures a 1280x800 viewport screenshot of the
homepage. Writes one PNG per competitor as {slug}-hero.png.
Output goes to <research-dir>/screenshots/.
Options:
--mode <remote|local> Which browse session to use (default: remote).
Passed as --remote / --local on each browse command.
--concurrency <n> How many competitors to capture in parallel (default: 1)
(screenshot takes ~3s; serial is usually fine)
--skip-existing Skip competitors that already have screenshots
--help, -h Show this help message`);
process.exit(args.includes('--help') || args.includes('-h') ? 0 : 1);
}
const dir = args[0];
const modeIdx = args.indexOf('--mode');
const browseMode = modeIdx !== -1 ? args[modeIdx + 1] : 'remote';
const modeFlag = browseMode === 'local' ? '--local' : '--remote';
// Drive a dedicated named session so we never collide with whatever `browse` session
// the user already has open (the default session is bound to one mode — opening it
// --remote while a --local session is live errors out). Stopped at the end of the run.
const SESSION = 'competitor-analysis-shots';
const browseFlags = [modeFlag, '-s', SESSION];
const concurrencyIdx = args.indexOf('--concurrency');
let concurrency = concurrencyIdx !== -1 ? parseInt(args[concurrencyIdx + 1], 10) : 1;
// Floor at 1: `--concurrency 0` would spawn zero workers (no screenshots captured, yet the
// script exits "successfully"), and a non-numeric value (NaN) would throw on Array(NaN).
// Normalize before the >1 clamp below.
if (!Number.isFinite(concurrency) || concurrency < 1) concurrency = 1;
const skipExisting = args.includes('--skip-existing');
// All captures share one named `browse` session; parallel `browse open/screenshot` calls would
// race on the same tab. Clamp concurrency to 1 and warn rather than silently corrupt output.
// (Each capture is fast — ~3-4s — so serial is acceptable.)
if (concurrency > 1) {
console.error(`Note: clamping --concurrency ${concurrency} to 1 — \`browse\` shares a single session across calls, so parallel screenshots would race on the same tab.`);
concurrency = 1;
}
const shotsDir = join(dir, 'screenshots');
mkdirSync(shotsDir, { recursive: true });
function run(cmd, args, { timeout = 30000 } = {}) {
return spawnSync(cmd, args, { encoding: 'utf-8', timeout, maxBuffer: 4 * 1024 * 1024 });
}
async function captureOne(slug, website) {
const heroPath = join(shotsDir, `${slug}-hero.png`);
const result = { slug, hero: null, errors: [] };
if (skipExisting && existsSync(heroPath)) {
return { ...result, hero: heroPath, skipped: true };
}
// Hero: viewport 1280x800, single-screen shot. The mode + session flags are passed on
// each command so every call resolves to the same dedicated browser session.
try {
const openRes = run('browse', ['open', website, ...browseFlags], { timeout: 30000 });
// `browse open` exits 0 even when navigation fails — it just lands the tab on
// `chrome-error://chromewebdata/`. Detect failure from the resulting URL, not the exit
// code, so we never screenshot a Chrome error page (and, since the session is reused
// across competitors, never save one competitor's page under another's slug).
let landedUrl = '';
try { landedUrl = (JSON.parse(openRes.stdout || '{}').url) || ''; } catch { /* non-JSON stdout */ }
if (openRes.status !== 0 || !landedUrl || /^chrome-error:\/\//.test(landedUrl) || landedUrl === 'about:blank') {
result.errors.push(`open failed (landed: ${landedUrl || 'unknown'}): ${openRes.stderr || openRes.stdout || `exit ${openRes.status}`}`.slice(0, 200));
return result;
}
run('browse', ['viewport', '1280', '800', ...browseFlags]);
run('browse', ['wait', 'timeout', '1500', ...browseFlags]); // let the hero settle
const r = run('browse', ['screenshot', '--path', heroPath, '--animations', 'disabled', ...browseFlags]);
if (r.status === 0 && existsSync(heroPath)) result.hero = heroPath;
else result.errors.push(`hero: ${r.stderr || r.stdout}`);
} catch (err) { result.errors.push(`hero exception: ${err.message}`); }
return result;
}
// Load competitor records
const files = readdirSync(dir).filter(f => f.endsWith('.md')).sort();
const jobs = [];
for (const f of files) {
const content = readFileSync(join(dir, f), 'utf-8');
const fm = parseFrontmatter(content);
if (!fm || !fm.website) continue;
const slug = f.replace('.md', '');
jobs.push({ slug, website: fm.website });
}
console.error(`Capturing hero screenshots for ${jobs.length} competitors → ${shotsDir}`);
const results = [];
const queue = [...jobs];
async function worker() {
while (queue.length > 0) {
const job = queue.shift();
const started = Date.now();
const r = await captureOne(job.slug, job.website);
results.push(r);
const elapsed = ((Date.now() - started) / 1000).toFixed(1);
const mark = r.hero ? 'H' : '-';
console.error(` [${mark}] ${job.slug.padEnd(24)} ${elapsed}s ${r.skipped ? '(skipped)' : ''}`);
if (r.errors.length) for (const e of r.errors) console.error(` ! ${e.slice(0, 120)}`);
}
}
await Promise.all(Array(Math.min(concurrency, jobs.length || 1)).fill(0).map(worker));
// Tear down the dedicated session so we don't leak a running browser (or remote
// Browserbase session) after the run. `browse stop` takes only `-s <session>` — it does NOT
// accept --remote/--local (passing them errors out), and `stop -s <session>` reliably stops
// a remote Browserbase session (verified against browse v0.8.5). Best-effort — ignore failures.
run('browse', ['stop', '-s', SESSION]);
const okHero = results.filter(r => r.hero).length;
console.error(`\nDone: ${okHero}/${jobs.length} hero`);
console.log(JSON.stringify({ total: jobs.length, hero: okHero, outputDir: shotsDir }));