playbook/antigravity-awesome-skills/plugins/antigravity-awesome-skills-.../skills/vercel-optimize/scripts/scan-codebase.mjs

314 lines
11 KiB
JavaScript
Executable File

#!/usr/bin/env node
// Walks the repo, runs every scanner in lib/scanners/, emits findings + routes
// + stack as JSON. Output is merged into signals.codebase.*. New scanners drop
// into lib/scanners/ + the barrel; this file is closed for modification.
import { readdir, readFile } from 'node:fs/promises';
import { join, relative } from 'node:path';
import { scanners } from '../lib/scanners/index.mjs';
import { detectStack } from '../lib/vercel.mjs';
import {
detectMonorepoRoot,
listWorkspacePackages,
buildResolver,
resolveWorkspaceImports,
} from '../lib/workspace-resolver.mjs';
const SCHEMA_VERSION = '1.0';
const SKIP_DIRS = new Set(['node_modules', '.next', '.vercel', 'dist', 'build', '.git', 'coverage', '.turbo', '__tests__', 'cypress']);
const SKIP_FILE_PATTERNS = [/\.test\./, /\.spec\./, /\.d\.ts$/];
async function main() {
const rootDir = process.argv[2] || process.cwd();
process.stderr.write(`[scan-codebase] scanning ${rootDir}\n`);
const [stack, files, routes] = await Promise.all([
detectStack(rootDir),
collectFiles(rootDir),
enumerateRoutes(rootDir),
]);
// In a monorepo, route files often re-export from workspace packages. Without
// resolving those, sub-agents abstain because the workspace path is outside
// their read scope.
const monorepoRoot = await detectMonorepoRoot(rootDir);
let workspacePackages = [];
let resolver = () => null;
if (monorepoRoot) {
workspacePackages = await listWorkspacePackages(monorepoRoot);
resolver = buildResolver(workspacePackages);
process.stderr.write(`[scan-codebase] monorepo root: ${monorepoRoot} (${workspacePackages.length} workspace packages)\n`);
}
await enrichRoutesWithWorkspaceImports(routes, rootDir, resolver, monorepoRoot);
process.stderr.write(`[scan-codebase] ${files.length} files, ${routes.length} routes, ${scanners.length} scanners\n`);
const findings = [];
for (const scanner of scanners) {
try {
const applicable = filterApplicable(files, scanner.metadata);
// Scanners may be sync or async (large-static-asset does fs.stat walks).
const found = await scanner.scan({ files: applicable, rootDir, routes, stack });
for (const f of (found ?? [])) {
findings.push({
...f,
route: mapFileToRoute(f.file, routes),
});
}
} catch (err) {
process.stderr.write(`[scan-codebase] scanner ${scanner.metadata?.id} threw: ${err.message}\n`);
}
}
findings.sort((a, b) =>
a.file.localeCompare(b.file)
|| (a.line ?? 0) - (b.line ?? 0)
|| a.pattern.localeCompare(b.pattern)
);
process.stdout.write(JSON.stringify({
schemaVersion: SCHEMA_VERSION,
scannedAt: new Date().toISOString(),
rootDir,
monorepoRoot: monorepoRoot ?? null,
workspacePackages: workspacePackages.map((p) => ({ name: p.name, dir: relative(monorepoRoot ?? rootDir, p.dir) })),
stack,
routes,
findings,
scannerMetadata: scanners.map((s) => ({
id: s.metadata.id,
title: s.metadata.title,
severity: s.metadata.severity,
billingDimension: s.metadata.billingDimension,
trafficIndependent: s.metadata.trafficIndependent,
})),
}, null, 2) + '\n');
process.stderr.write(`[scan-codebase] ${findings.length} finding(s)\n`);
}
// Record workspace-package imports per route so the brief allowlists them and
// sub-agents can investigate the real source rather than abstaining on a thin
// re-export shell. Capped to keep the brief focused (source order ≈ import order,
// so the primary view component usually leads).
const WORKSPACE_IMPORT_LIMIT_PER_ROUTE = 12;
async function enrichRoutesWithWorkspaceImports(routes, scanRootDir, resolver, monorepoRoot) {
if (!monorepoRoot) return;
for (const r of routes) {
if (!r?.file) continue;
const abs = join(scanRootDir, r.file);
const resolved = await resolveWorkspaceImports(abs, resolver, {
pureBarrelDepth: 3,
suffixFanoutDepth: 2,
perSpecifierCap: 3,
});
if (resolved.length === 0) continue;
// Paths must be relative to the monorepo root so they align between signals + verifier.
r.workspaceImports = resolved
.slice(0, WORKSPACE_IMPORT_LIMIT_PER_ROUTE)
.map((abs) => relative(monorepoRoot, abs));
}
}
async function collectFiles(root) {
const entries = await readdir(root, { recursive: true, withFileTypes: true });
const out = [];
for (const e of entries) {
if (!e.isFile()) continue;
const segments = (e.parentPath ?? e.path ?? root).split('/');
if (segments.some((s) => SKIP_DIRS.has(s))) continue;
if (SKIP_FILE_PATTERNS.some((re) => re.test(e.name))) continue;
if (!/\.(tsx?|jsx?|mjs|cjs|html|svelte|astro|vue|json)$/.test(e.name)) continue;
const full = join(e.parentPath ?? e.path ?? root, e.name);
try {
const content = await readFile(full, 'utf-8');
if (content.length > 500_000) continue;
out.push({ path: relative(root, full), content });
} catch {}
}
return out;
}
function filterApplicable(files, meta) {
const incl = meta.includeGlobs ?? ['**/*'];
return files.filter((f) => incl.some((g) => globMatch(g, f.path)));
}
// Tiny glob → regex. Supports **, *, and {a,b} alternation.
function globMatch(pattern, path) {
const re = new RegExp(
'^' +
pattern
.replace(/[.+^$()|[\]\\]/g, '\\$&')
.replace(/\{([^}]+)\}/g, (_, inner) => '(' + inner.split(',').join('|') + ')')
.replace(/\*\*/g, '__GLOBSTAR__')
.replace(/\*/g, '[^/]*')
.replace(/__GLOBSTAR__/g, '.*')
+ '$'
);
return re.test(path);
}
async function enumerateRoutes(root) {
const entries = await readdir(root, { recursive: true, withFileTypes: true });
const routes = [];
for (const e of entries) {
if (!e.isFile()) continue;
const segments = (e.parentPath ?? e.path ?? root).split('/');
if (segments.some((s) => SKIP_DIRS.has(s))) continue;
const full = join(e.parentPath ?? e.path ?? root, e.name);
const rel = relative(root, full);
// App Router: route groups ((name)), parallel routes (@slot), private folders
// (_name), and the top-level page.tsx (no path segment) all need explicit handling.
let m = rel.match(/^(?:src\/)?app\/(.*)\/(page|route|layout)\.(tsx?|jsx?)$/);
if (!m) {
const top = rel.match(/^(?:src\/)?app\/(page|route|layout)\.(tsx?|jsx?)$/);
if (top) {
routes.push({
routePath: '/',
file: rel,
type: routeEntryType(top[1]),
});
continue;
}
}
if (m) {
const stripped = m[1]
.split('/')
.filter((seg) => !/^\([^)]+\)$/.test(seg) && !/^@/.test(seg) && !/^_/.test(seg))
.join('/')
.replace(/^\/+|\/+$/g, '');
const routePath = stripped === '' ? '/' : `/${stripped}`;
routes.push({
routePath,
file: rel,
type: routeEntryType(m[2]),
});
continue;
}
// Astro endpoint filenames commonly include the response extension
// (`feed.xml.ts`, `robots.txt.ts`). Handle these before the generic
// `src/pages` rule, which otherwise treats them as page components.
m = rel.match(/^src\/pages\/(.*\.(?:xml|json|txt|rss|atom|svg|png|jpg|jpeg|webp))\.(tsx?|jsx?|mjs|cjs)$/);
if (m) {
const name = normalizeRouteFileStem(m[1]);
routes.push({
routePath: name === '' ? '/' : '/' + name,
file: rel,
type: 'route',
});
continue;
}
m = rel.match(/^(?:src\/)?pages\/(.*)\.(tsx?|jsx?)$/);
if (m) {
const name = m[1].replace(/\/index$/, '').replace(/^index$/, '');
const isApi = /^api\//.test(name);
routes.push({
routePath: name === '' ? '/' : '/' + name,
file: rel,
type: isApi ? 'route' : 'page',
});
continue;
}
// Nuxt 3/4 pages. Dynamic segments use the same bracket shape as metrics
// (`[id]`, `[...slug]`), so keep them intact for route matching.
m = rel.match(/^(?:app\/)?pages\/(.*)\.vue$/);
if (m) {
const name = normalizeRouteFileStem(m[1]);
routes.push({
routePath: name === '' ? '/' : '/' + name,
file: rel,
type: 'page',
});
continue;
}
// Nuxt server routes: server/api/foo.get.ts -> /api/foo,
// server/routes/rss.xml.ts -> /rss.xml.
m = rel.match(/^server\/(api|routes)\/(.*)\.(tsx?|jsx?|mjs|cjs)$/);
if (m) {
const base = m[1] === 'api' ? 'api/' : '';
const name = normalizeRouteFileStem(`${base}${m[2]}`);
routes.push({
routePath: name === '' ? '/' : '/' + name,
file: rel,
type: 'route',
});
continue;
}
// Astro pages and endpoints. This is limited framework support, but route
// mapping still improves reports when Vercel metrics use user-facing paths.
m = rel.match(/^src\/pages\/(.*)\.(astro|tsx?|jsx?|mjs|cjs)$/);
if (m) {
const name = normalizeRouteFileStem(m[1]);
routes.push({
routePath: name === '' ? '/' : '/' + name,
file: rel,
type: m[2] === 'astro' ? 'page' : 'route',
});
continue;
}
// SvelteKit: +page.svelte = page, +page.server.{ts,js} pairs with it (treat
// as page), +server.{ts,js} = API route, +layout.* = ancestor layout context.
// Route groups (auth) stripped like Next; dynamic segments [slug]/[...rest]/[[opt]] preserved.
m = rel.match(/^src\/routes\/(.*)\/\+(page\.svelte|page\.server\.(?:ts|js)|server\.(?:ts|js)|layout\.svelte|layout\.server\.(?:ts|js))$/);
if (m || /^src\/routes\/\+(page\.svelte|page\.server\.(?:ts|js)|server\.(?:ts|js)|layout\.svelte|layout\.server\.(?:ts|js))$/.test(rel)) {
const fileTypeMatch = rel.match(/\+(page\.svelte|page\.server\.(?:ts|js)|server\.(?:ts|js)|layout\.svelte|layout\.server\.(?:ts|js))$/);
const fileType = fileTypeMatch?.[1] ?? '';
const segs = (m?.[1] ?? '').split('/').filter(Boolean)
.filter((seg) => !/^\([^)]+\)$/.test(seg));
const routePath = segs.length === 0 ? '/' : '/' + segs.join('/');
const type = fileType.startsWith('server') ? 'route' : fileType.startsWith('layout') ? 'layout' : 'page';
// When +page.svelte AND +page.server.ts both exist, +page.svelte wins ownership.
const existing = type === 'layout' ? null : routes.find((r) => r.routePath === routePath && r.type !== 'layout');
if (existing) {
if (fileType === 'page.svelte' && existing.type === 'page') {
existing.file = rel;
}
continue;
}
routes.push({ routePath, file: rel, type });
continue;
}
}
return routes.sort((a, b) =>
a.routePath.localeCompare(b.routePath)
|| routeTypeOrder(a.type) - routeTypeOrder(b.type)
|| a.file.localeCompare(b.file)
);
}
function routeEntryType(name) {
return name === 'route' ? 'route' : name === 'layout' ? 'layout' : 'page';
}
function normalizeRouteFileStem(stem) {
return String(stem ?? '')
.replace(/\/index$/, '')
.replace(/^index$/, '')
.replace(/\.(?:get|post|put|patch|delete|options|head)$/, '')
.replace(/^\/+|\/+$/g, '');
}
function routeTypeOrder(type) {
return type === 'page' ? 0 : type === 'route' ? 1 : type === 'layout' ? 2 : 3;
}
function mapFileToRoute(filePath, routes) {
const r = routes.find((rt) => rt.file === filePath);
return r?.routePath ?? null;
}
main().catch((err) => {
process.stderr.write(`[scan-codebase] FAILED: ${err.message}\n`);
process.exit(1);
});