314 lines
11 KiB
JavaScript
Executable File
314 lines
11 KiB
JavaScript
Executable File
#!/usr/bin/env node
|
|
// Walks the repo, runs every scanner in lib/scanners/, emits findings + routes
|
|
// + stack as JSON. Output is merged into signals.codebase.*. New scanners drop
|
|
// into lib/scanners/ + the barrel; this file is closed for modification.
|
|
|
|
import { readdir, readFile } from 'node:fs/promises';
|
|
import { join, relative } from 'node:path';
|
|
import { scanners } from '../lib/scanners/index.mjs';
|
|
import { detectStack } from '../lib/vercel.mjs';
|
|
import {
|
|
detectMonorepoRoot,
|
|
listWorkspacePackages,
|
|
buildResolver,
|
|
resolveWorkspaceImports,
|
|
} from '../lib/workspace-resolver.mjs';
|
|
|
|
const SCHEMA_VERSION = '1.0';
|
|
const SKIP_DIRS = new Set(['node_modules', '.next', '.vercel', 'dist', 'build', '.git', 'coverage', '.turbo', '__tests__', 'cypress']);
|
|
const SKIP_FILE_PATTERNS = [/\.test\./, /\.spec\./, /\.d\.ts$/];
|
|
|
|
async function main() {
|
|
const rootDir = process.argv[2] || process.cwd();
|
|
process.stderr.write(`[scan-codebase] scanning ${rootDir}\n`);
|
|
|
|
const [stack, files, routes] = await Promise.all([
|
|
detectStack(rootDir),
|
|
collectFiles(rootDir),
|
|
enumerateRoutes(rootDir),
|
|
]);
|
|
|
|
// In a monorepo, route files often re-export from workspace packages. Without
|
|
// resolving those, sub-agents abstain because the workspace path is outside
|
|
// their read scope.
|
|
const monorepoRoot = await detectMonorepoRoot(rootDir);
|
|
let workspacePackages = [];
|
|
let resolver = () => null;
|
|
if (monorepoRoot) {
|
|
workspacePackages = await listWorkspacePackages(monorepoRoot);
|
|
resolver = buildResolver(workspacePackages);
|
|
process.stderr.write(`[scan-codebase] monorepo root: ${monorepoRoot} (${workspacePackages.length} workspace packages)\n`);
|
|
}
|
|
await enrichRoutesWithWorkspaceImports(routes, rootDir, resolver, monorepoRoot);
|
|
|
|
process.stderr.write(`[scan-codebase] ${files.length} files, ${routes.length} routes, ${scanners.length} scanners\n`);
|
|
|
|
const findings = [];
|
|
for (const scanner of scanners) {
|
|
try {
|
|
const applicable = filterApplicable(files, scanner.metadata);
|
|
// Scanners may be sync or async (large-static-asset does fs.stat walks).
|
|
const found = await scanner.scan({ files: applicable, rootDir, routes, stack });
|
|
for (const f of (found ?? [])) {
|
|
findings.push({
|
|
...f,
|
|
route: mapFileToRoute(f.file, routes),
|
|
});
|
|
}
|
|
} catch (err) {
|
|
process.stderr.write(`[scan-codebase] scanner ${scanner.metadata?.id} threw: ${err.message}\n`);
|
|
}
|
|
}
|
|
|
|
findings.sort((a, b) =>
|
|
a.file.localeCompare(b.file)
|
|
|| (a.line ?? 0) - (b.line ?? 0)
|
|
|| a.pattern.localeCompare(b.pattern)
|
|
);
|
|
|
|
process.stdout.write(JSON.stringify({
|
|
schemaVersion: SCHEMA_VERSION,
|
|
scannedAt: new Date().toISOString(),
|
|
rootDir,
|
|
monorepoRoot: monorepoRoot ?? null,
|
|
workspacePackages: workspacePackages.map((p) => ({ name: p.name, dir: relative(monorepoRoot ?? rootDir, p.dir) })),
|
|
stack,
|
|
routes,
|
|
findings,
|
|
scannerMetadata: scanners.map((s) => ({
|
|
id: s.metadata.id,
|
|
title: s.metadata.title,
|
|
severity: s.metadata.severity,
|
|
billingDimension: s.metadata.billingDimension,
|
|
trafficIndependent: s.metadata.trafficIndependent,
|
|
})),
|
|
}, null, 2) + '\n');
|
|
|
|
process.stderr.write(`[scan-codebase] ${findings.length} finding(s)\n`);
|
|
}
|
|
|
|
// Record workspace-package imports per route so the brief allowlists them and
|
|
// sub-agents can investigate the real source rather than abstaining on a thin
|
|
// re-export shell. Capped to keep the brief focused (source order ≈ import order,
|
|
// so the primary view component usually leads).
|
|
const WORKSPACE_IMPORT_LIMIT_PER_ROUTE = 12;
|
|
async function enrichRoutesWithWorkspaceImports(routes, scanRootDir, resolver, monorepoRoot) {
|
|
if (!monorepoRoot) return;
|
|
for (const r of routes) {
|
|
if (!r?.file) continue;
|
|
const abs = join(scanRootDir, r.file);
|
|
const resolved = await resolveWorkspaceImports(abs, resolver, {
|
|
pureBarrelDepth: 3,
|
|
suffixFanoutDepth: 2,
|
|
perSpecifierCap: 3,
|
|
});
|
|
if (resolved.length === 0) continue;
|
|
// Paths must be relative to the monorepo root so they align between signals + verifier.
|
|
r.workspaceImports = resolved
|
|
.slice(0, WORKSPACE_IMPORT_LIMIT_PER_ROUTE)
|
|
.map((abs) => relative(monorepoRoot, abs));
|
|
}
|
|
}
|
|
|
|
async function collectFiles(root) {
|
|
const entries = await readdir(root, { recursive: true, withFileTypes: true });
|
|
const out = [];
|
|
for (const e of entries) {
|
|
if (!e.isFile()) continue;
|
|
const segments = (e.parentPath ?? e.path ?? root).split('/');
|
|
if (segments.some((s) => SKIP_DIRS.has(s))) continue;
|
|
if (SKIP_FILE_PATTERNS.some((re) => re.test(e.name))) continue;
|
|
if (!/\.(tsx?|jsx?|mjs|cjs|html|svelte|astro|vue|json)$/.test(e.name)) continue;
|
|
|
|
const full = join(e.parentPath ?? e.path ?? root, e.name);
|
|
try {
|
|
const content = await readFile(full, 'utf-8');
|
|
if (content.length > 500_000) continue;
|
|
out.push({ path: relative(root, full), content });
|
|
} catch {}
|
|
}
|
|
return out;
|
|
}
|
|
|
|
function filterApplicable(files, meta) {
|
|
const incl = meta.includeGlobs ?? ['**/*'];
|
|
return files.filter((f) => incl.some((g) => globMatch(g, f.path)));
|
|
}
|
|
|
|
// Tiny glob → regex. Supports **, *, and {a,b} alternation.
|
|
function globMatch(pattern, path) {
|
|
const re = new RegExp(
|
|
'^' +
|
|
pattern
|
|
.replace(/[.+^$()|[\]\\]/g, '\\$&')
|
|
.replace(/\{([^}]+)\}/g, (_, inner) => '(' + inner.split(',').join('|') + ')')
|
|
.replace(/\*\*/g, '__GLOBSTAR__')
|
|
.replace(/\*/g, '[^/]*')
|
|
.replace(/__GLOBSTAR__/g, '.*')
|
|
+ '$'
|
|
);
|
|
return re.test(path);
|
|
}
|
|
|
|
async function enumerateRoutes(root) {
|
|
const entries = await readdir(root, { recursive: true, withFileTypes: true });
|
|
const routes = [];
|
|
for (const e of entries) {
|
|
if (!e.isFile()) continue;
|
|
const segments = (e.parentPath ?? e.path ?? root).split('/');
|
|
if (segments.some((s) => SKIP_DIRS.has(s))) continue;
|
|
|
|
const full = join(e.parentPath ?? e.path ?? root, e.name);
|
|
const rel = relative(root, full);
|
|
|
|
// App Router: route groups ((name)), parallel routes (@slot), private folders
|
|
// (_name), and the top-level page.tsx (no path segment) all need explicit handling.
|
|
let m = rel.match(/^(?:src\/)?app\/(.*)\/(page|route|layout)\.(tsx?|jsx?)$/);
|
|
if (!m) {
|
|
const top = rel.match(/^(?:src\/)?app\/(page|route|layout)\.(tsx?|jsx?)$/);
|
|
if (top) {
|
|
routes.push({
|
|
routePath: '/',
|
|
file: rel,
|
|
type: routeEntryType(top[1]),
|
|
});
|
|
continue;
|
|
}
|
|
}
|
|
if (m) {
|
|
const stripped = m[1]
|
|
.split('/')
|
|
.filter((seg) => !/^\([^)]+\)$/.test(seg) && !/^@/.test(seg) && !/^_/.test(seg))
|
|
.join('/')
|
|
.replace(/^\/+|\/+$/g, '');
|
|
const routePath = stripped === '' ? '/' : `/${stripped}`;
|
|
routes.push({
|
|
routePath,
|
|
file: rel,
|
|
type: routeEntryType(m[2]),
|
|
});
|
|
continue;
|
|
}
|
|
|
|
// Astro endpoint filenames commonly include the response extension
|
|
// (`feed.xml.ts`, `robots.txt.ts`). Handle these before the generic
|
|
// `src/pages` rule, which otherwise treats them as page components.
|
|
m = rel.match(/^src\/pages\/(.*\.(?:xml|json|txt|rss|atom|svg|png|jpg|jpeg|webp))\.(tsx?|jsx?|mjs|cjs)$/);
|
|
if (m) {
|
|
const name = normalizeRouteFileStem(m[1]);
|
|
routes.push({
|
|
routePath: name === '' ? '/' : '/' + name,
|
|
file: rel,
|
|
type: 'route',
|
|
});
|
|
continue;
|
|
}
|
|
|
|
m = rel.match(/^(?:src\/)?pages\/(.*)\.(tsx?|jsx?)$/);
|
|
if (m) {
|
|
const name = m[1].replace(/\/index$/, '').replace(/^index$/, '');
|
|
const isApi = /^api\//.test(name);
|
|
routes.push({
|
|
routePath: name === '' ? '/' : '/' + name,
|
|
file: rel,
|
|
type: isApi ? 'route' : 'page',
|
|
});
|
|
continue;
|
|
}
|
|
|
|
// Nuxt 3/4 pages. Dynamic segments use the same bracket shape as metrics
|
|
// (`[id]`, `[...slug]`), so keep them intact for route matching.
|
|
m = rel.match(/^(?:app\/)?pages\/(.*)\.vue$/);
|
|
if (m) {
|
|
const name = normalizeRouteFileStem(m[1]);
|
|
routes.push({
|
|
routePath: name === '' ? '/' : '/' + name,
|
|
file: rel,
|
|
type: 'page',
|
|
});
|
|
continue;
|
|
}
|
|
|
|
// Nuxt server routes: server/api/foo.get.ts -> /api/foo,
|
|
// server/routes/rss.xml.ts -> /rss.xml.
|
|
m = rel.match(/^server\/(api|routes)\/(.*)\.(tsx?|jsx?|mjs|cjs)$/);
|
|
if (m) {
|
|
const base = m[1] === 'api' ? 'api/' : '';
|
|
const name = normalizeRouteFileStem(`${base}${m[2]}`);
|
|
routes.push({
|
|
routePath: name === '' ? '/' : '/' + name,
|
|
file: rel,
|
|
type: 'route',
|
|
});
|
|
continue;
|
|
}
|
|
|
|
// Astro pages and endpoints. This is limited framework support, but route
|
|
// mapping still improves reports when Vercel metrics use user-facing paths.
|
|
m = rel.match(/^src\/pages\/(.*)\.(astro|tsx?|jsx?|mjs|cjs)$/);
|
|
if (m) {
|
|
const name = normalizeRouteFileStem(m[1]);
|
|
routes.push({
|
|
routePath: name === '' ? '/' : '/' + name,
|
|
file: rel,
|
|
type: m[2] === 'astro' ? 'page' : 'route',
|
|
});
|
|
continue;
|
|
}
|
|
|
|
// SvelteKit: +page.svelte = page, +page.server.{ts,js} pairs with it (treat
|
|
// as page), +server.{ts,js} = API route, +layout.* = ancestor layout context.
|
|
// Route groups (auth) stripped like Next; dynamic segments [slug]/[...rest]/[[opt]] preserved.
|
|
m = rel.match(/^src\/routes\/(.*)\/\+(page\.svelte|page\.server\.(?:ts|js)|server\.(?:ts|js)|layout\.svelte|layout\.server\.(?:ts|js))$/);
|
|
if (m || /^src\/routes\/\+(page\.svelte|page\.server\.(?:ts|js)|server\.(?:ts|js)|layout\.svelte|layout\.server\.(?:ts|js))$/.test(rel)) {
|
|
const fileTypeMatch = rel.match(/\+(page\.svelte|page\.server\.(?:ts|js)|server\.(?:ts|js)|layout\.svelte|layout\.server\.(?:ts|js))$/);
|
|
const fileType = fileTypeMatch?.[1] ?? '';
|
|
const segs = (m?.[1] ?? '').split('/').filter(Boolean)
|
|
.filter((seg) => !/^\([^)]+\)$/.test(seg));
|
|
const routePath = segs.length === 0 ? '/' : '/' + segs.join('/');
|
|
const type = fileType.startsWith('server') ? 'route' : fileType.startsWith('layout') ? 'layout' : 'page';
|
|
// When +page.svelte AND +page.server.ts both exist, +page.svelte wins ownership.
|
|
const existing = type === 'layout' ? null : routes.find((r) => r.routePath === routePath && r.type !== 'layout');
|
|
if (existing) {
|
|
if (fileType === 'page.svelte' && existing.type === 'page') {
|
|
existing.file = rel;
|
|
}
|
|
continue;
|
|
}
|
|
routes.push({ routePath, file: rel, type });
|
|
continue;
|
|
}
|
|
}
|
|
return routes.sort((a, b) =>
|
|
a.routePath.localeCompare(b.routePath)
|
|
|| routeTypeOrder(a.type) - routeTypeOrder(b.type)
|
|
|| a.file.localeCompare(b.file)
|
|
);
|
|
}
|
|
|
|
function routeEntryType(name) {
|
|
return name === 'route' ? 'route' : name === 'layout' ? 'layout' : 'page';
|
|
}
|
|
|
|
function normalizeRouteFileStem(stem) {
|
|
return String(stem ?? '')
|
|
.replace(/\/index$/, '')
|
|
.replace(/^index$/, '')
|
|
.replace(/\.(?:get|post|put|patch|delete|options|head)$/, '')
|
|
.replace(/^\/+|\/+$/g, '');
|
|
}
|
|
|
|
function routeTypeOrder(type) {
|
|
return type === 'page' ? 0 : type === 'route' ? 1 : type === 'layout' ? 2 : 3;
|
|
}
|
|
|
|
function mapFileToRoute(filePath, routes) {
|
|
const r = routes.find((rt) => rt.file === filePath);
|
|
return r?.routePath ?? null;
|
|
}
|
|
|
|
main().catch((err) => {
|
|
process.stderr.write(`[scan-codebase] FAILED: ${err.message}\n`);
|
|
process.exit(1);
|
|
});
|