/** * Unit tests for parseFrontmatterBooks(). * * Run: node scripts/validate-repo.test.mjs * * Uses Node.js built-in assert — no test framework required. */ import assert from "node:assert/strict"; import { execFileSync } from "node:child_process"; import { readFileSync, writeFileSync, mkdtempSync, rmSync } from "node:fs"; import { fileURLToPath } from "node:url"; import path from "node:path"; import os from "node:os"; import { assembleSystemPrompt, VALID_MODES } from "./assemble-prompt.mjs"; import { readHistory, appendHistory, getTrend, normalizeMode, sparkline, renderHistory } from "./history.mjs"; import { parseFrontmatterBooks, countBookSections, countProductionRisks, countTestRisks, extractChangelogVersion, extractGuideStepLabels, } from "./frontmatter.mjs"; import { extractRiskCodes, classify } from "./eval-utils.mjs"; import { parseFindings, countFindings, extractLocation } from "./report-parse.mjs"; import { reportToSarif } from "./sarif.mjs"; import { severityBreached, isRegression } from "./ci-gate.mjs"; import { summarize } from "./benchmark.mjs"; const __dirname = path.dirname(fileURLToPath(import.meta.url)); let passed = 0; let failed = 0; function test(name, fn) { try { fn(); console.log(` ✓ ${name}`); passed++; } catch (err) { console.error(` ✗ ${name}`); console.error(` ${err.message}`); failed++; } } // ── parseFrontmatterBooks ────────────────────────────────────────────────── console.log("\nparseFrontmatterBooks"); test("returns book titles from valid frontmatter", () => { const text = [ "---", "books:", " - The Mythical Man-Month", " - Code Complete", "---", "", "# Content", ].join("\n"); assert.deepEqual(parseFrontmatterBooks(text), ["The Mythical Man-Month", "Code Complete"]); }); test("returns null when file has no frontmatter", () => { const text = "# Source Coverage Matrix\n\nSome content here."; assert.equal(parseFrontmatterBooks(text), null); }); test("returns null when frontmatter has no books key", () => { const text = "---\nversion: 1\nauthor: hyhmrright\n---\n\n# Content"; assert.equal(parseFrontmatterBooks(text), null); }); test("returns null when books list is empty", () => { const text = "---\nbooks:\n---\n\n# Content"; assert.equal(parseFrontmatterBooks(text), null); }); test("handles 4-space indentation", () => { const text = "---\nbooks:\n - The Mythical Man-Month\n - Code Complete\n---\n"; assert.deepEqual(parseFrontmatterBooks(text), ["The Mythical Man-Month", "Code Complete"]); }); test("handles CRLF line endings", () => { const text = "---\r\nbooks:\r\n - The Mythical Man-Month\r\n - Code Complete\r\n---\r\n"; assert.deepEqual(parseFrontmatterBooks(text), ["The Mythical Man-Month", "Code Complete"]); }); test("handles titles containing colons", () => { const text = "---\nbooks:\n - Domain-Driven Design: Tackling Complexity\n---\n"; assert.deepEqual(parseFrontmatterBooks(text), ["Domain-Driven Design: Tackling Complexity"]); }); test("strips surrounding whitespace from titles", () => { const text = "---\nbooks:\n - Padded Title \n---\n"; assert.deepEqual(parseFrontmatterBooks(text), ["Padded Title"]); }); test("handles single-book list", () => { const text = "---\nbooks:\n - The Pragmatic Programmer\n---\n"; assert.deepEqual(parseFrontmatterBooks(text), ["The Pragmatic Programmer"]); }); test("ignores non-books frontmatter keys before books:", () => { const text = "---\nname: brooks-lint\nbooks:\n - Refactoring\n---\n"; assert.deepEqual(parseFrontmatterBooks(text), ["Refactoring"]); }); test("ignores non-books frontmatter keys after books:", () => { const text = "---\nbooks:\n - Refactoring\nversion: 1\n---\n"; assert.deepEqual(parseFrontmatterBooks(text), ["Refactoring"]); }); // ── countBookSections ────────────────────────────────────────────────────── console.log("\ncountBookSections"); test("counts sections matching '## Author — *Title*'", () => { const text = [ "## Frederick Brooks — *The Mythical Man-Month*", "some content", "## Steve McConnell — *Code Complete*", "more content", ].join("\n"); assert.equal(countBookSections(text), 2); }); test("returns 0 when no book sections exist", () => { assert.equal(countBookSections("## No Em Dash Here\n## Also No Match\n"), 0); }); test("does not count lines without the em-dash separator", () => { const text = "## Author Name *Book Title*\n## Author — *Real Book*\n"; assert.equal(countBookSections(text), 1); }); // ── countProductionRisks ─────────────────────────────────────────────────── console.log("\ncountProductionRisks"); test("counts '## Risk N:' headers", () => { const text = "## Risk 1: Cognitive Overload\n## Risk 2: Change Propagation\n## Risk 3: Knowledge Duplication\n"; assert.equal(countProductionRisks(text), 3); }); test("returns 0 when no production risk headers present", () => { assert.equal(countProductionRisks("## Risk T1: Test Obscurity\n"), 0); }); test("does not count test risk headers (Risk T…)", () => { const text = "## Risk T1: Test Obscurity\n## Risk 1: Real Risk\n"; assert.equal(countProductionRisks(text), 1); }); // ── countTestRisks ───────────────────────────────────────────────────────── console.log("\ncountTestRisks"); test("counts '## Risk TN:' headers", () => { const text = "## Risk T1: Test Obscurity\n## Risk T2: Test Brittleness\n"; assert.equal(countTestRisks(text), 2); }); test("returns 0 when no test risk headers present", () => { assert.equal(countTestRisks("## Risk 1: Cognitive Overload\n"), 0); }); test("does not count production risk headers", () => { const text = "## Risk 1: Real Risk\n## Risk T1: Test Risk\n## Risk T2: Another\n"; assert.equal(countTestRisks(text), 2); }); // ── extractChangelogVersion ──────────────────────────────────────────────── console.log("\nextractChangelogVersion"); test("extracts version from standard changelog header", () => { const text = "# Changelog\n\n## [1.2.3] - 2026-04-12\n\nSome changes."; assert.equal(extractChangelogVersion(text), "1.2.3"); }); test("returns the first (latest) version when multiple entries exist", () => { const text = "## [2.0.0] - 2026-04-12\n\n## [1.9.0] - 2026-03-01\n"; assert.equal(extractChangelogVersion(text), "2.0.0"); }); test("returns null when no version header found", () => { assert.equal(extractChangelogVersion("# Changelog\n\nNo versions yet."), null); }); // ── extractGuideStepLabels ─────────────────────────────────────────────── console.log("\nextractGuideStepLabels"); test("extracts step labels from standard headings", () => { const text = "### Step 1: Understand scope\n### Step 2: Scan\n### Step 3: Output\n"; assert.deepEqual(extractGuideStepLabels(text), ["1", "2", "3"]); }); test("extracts sub-step labels (a/b suffixes)", () => { const text = "### Step 2a: Scan for Brittleness\n### Step 2b: Scan for Mock Abuse\n"; assert.deepEqual(extractGuideStepLabels(text), ["2a", "2b"]); }); test("handles 0-indexed steps", () => { const text = "### Step 0: Gather Context\n### Step 1: Draw Graph\n### Step 2: Scan\n"; assert.deepEqual(extractGuideStepLabels(text), ["0", "1", "2"]); }); test("returns empty array when no step headings exist", () => { assert.deepEqual(extractGuideStepLabels("## Process\n\nSome text.\n"), []); }); test("ignores non-step headings", () => { const text = "### Before You Start\n### Step 1: Real Step\n### Output\n"; assert.deepEqual(extractGuideStepLabels(text), ["1"]); }); test("handles mixed main and sub-steps", () => { const text = [ "### Step 1: First", "### Step 2: Second", "### Step 2b: Sub of second", "### Step 3: Third", ].join("\n"); assert.deepEqual(extractGuideStepLabels(text), ["1", "2", "2b", "3"]); }); test("handles full pr-review-guide pattern", () => { const text = [ "### Step 1: Understand the scope", "### Step 2: Scan for Change Propagation", "### Step 3: Scan for Cognitive Overload", "### Step 4: Scan for Knowledge Duplication", "### Step 5: Scan for Accidental Complexity", "### Step 6a: Scan for Dependency Disorder", "### Step 6b: Scan for Domain Model Distortion", "### Step 7: Quick Test Check", ].join("\n"); assert.deepEqual( extractGuideStepLabels(text), ["1", "2", "3", "4", "5", "6a", "6b", "7"], ); }); // —— assembleSystemPrompt / VALID_MODES ———————————————————————————————— console.log("\nassembleSystemPrompt"); test("includes sweep in VALID_MODES", () => { assert.ok(VALID_MODES.includes("sweep")); }); test("assembles sweep prompt with both risk catalogs and sweep guide", () => { const prompt = assembleSystemPrompt("sweep", path.join(__dirname, "..", "skills")); assert.match(prompt, /## Risk 1: Cognitive Overload/); assert.match(prompt, /## Risk T1: Test Obscurity/); assert.match(prompt, /# Brooks-Lint .* Full Sweep Guide/); }); // ── readHistory ──────────────────────────────────────────────────────────── console.log("\nreadHistory"); function withTempDir(fn) { const dir = mkdtempSync(path.join(os.tmpdir(), "brooks-lint-test-")); try { fn(dir); } finally { rmSync(dir, { recursive: true }); } } test("returns empty array when history file does not exist", () => { withTempDir(dir => assert.deepEqual(readHistory(dir), [])); }); test("returns parsed array when history file exists", () => { withTempDir(dir => { const record = { date: "2026-04-16T00:00:00Z", mode: "PR Review", score: 85, findings: { critical: 0, warning: 1, suggestion: 2 }, scope: "staged changes", }; writeFileSync(path.join(dir, ".brooks-lint-history.json"), JSON.stringify([record])); assert.deepEqual(readHistory(dir), [record]); }); }); test("returns empty array when history file contains invalid JSON", () => { withTempDir(dir => { writeFileSync(path.join(dir, ".brooks-lint-history.json"), "not valid json"); assert.deepEqual(readHistory(dir), []); }); }); // ── appendHistory ───────────────────────────────────────────────────────── console.log("\nappendHistory"); test("creates history file with first record", () => { withTempDir(dir => { const record = { date: "2026-04-16T00:00:00Z", mode: "PR Review", score: 82, findings: { critical: 1, warning: 2, suggestion: 3 }, scope: "staged changes (3 files)", }; appendHistory(dir, record); assert.deepEqual(readHistory(dir), [record]); }); }); test("appends to existing history without overwriting", () => { withTempDir(dir => { const record1 = { date: "2026-04-15T00:00:00Z", mode: "PR Review", score: 85, findings: { critical: 0, warning: 1, suggestion: 2 }, scope: "staged changes", }; const record2 = { date: "2026-04-16T00:00:00Z", mode: "PR Review", score: 82, findings: { critical: 1, warning: 2, suggestion: 3 }, scope: "staged changes (3 files)", }; appendHistory(dir, record1); appendHistory(dir, record2); assert.deepEqual(readHistory(dir), [record1, record2]); }); }); // ── getTrend ─────────────────────────────────────────────────────────────── console.log("\ngetTrend"); test("returns null when history is empty", () => { assert.equal(getTrend([], "PR Review"), null); }); test("returns null when no records for the requested mode", () => { const history = [{ mode: "Architecture Audit", score: 90 }]; assert.equal(getTrend(history, "PR Review"), null); }); test("returns lastScore and runCount for one prior record", () => { const history = [{ mode: "PR Review", score: 85 }]; const trend = getTrend(history, "PR Review"); assert.equal(trend.lastScore, 85); assert.equal(trend.runCount, 1); }); test("returns most recent score when multiple records exist", () => { const history = [ { mode: "PR Review", score: 90 }, { mode: "PR Review", score: 85 }, { mode: "PR Review", score: 82 }, ]; const trend = getTrend(history, "PR Review"); assert.equal(trend.lastScore, 82); assert.equal(trend.runCount, 3); }); test("ignores records for other modes", () => { const history = [ { mode: "Architecture Audit", score: 90 }, { mode: "PR Review", score: 85 }, { mode: "PR Review", score: 82 }, ]; const trend = getTrend(history, "PR Review"); assert.equal(trend.lastScore, 82); assert.equal(trend.runCount, 2); }); test("matches a canonical query against display-name records", () => { // Regression: ci-review.mjs queries with the canonical mode ("review") while // records written by the model are stored as display names ("PR Review"). const history = [{ mode: "PR Review", score: 88 }]; const trend = getTrend(history, "review"); assert.equal(trend.lastScore, 88); assert.equal(trend.runCount, 1); }); // ── normalizeMode ──────────────────────────────────────────────────────────── console.log("\nnormalizeMode"); test("maps display names to canonical modes", () => { assert.equal(normalizeMode("PR Review"), "review"); assert.equal(normalizeMode("Architecture Audit"), "audit"); assert.equal(normalizeMode("Tech Debt Assessment"), "debt"); assert.equal(normalizeMode("Full Sweep"), "sweep"); }); test("passes canonical names through unchanged", () => { assert.equal(normalizeMode("review"), "review"); assert.equal(normalizeMode("health"), "health"); }); test("is case- and whitespace-insensitive", () => { assert.equal(normalizeMode(" pr review "), "review"); }); test("passes non-string input through unchanged", () => { assert.equal(normalizeMode(undefined), undefined); }); // ── sparkline ──────────────────────────────────────────────────────────────── console.log("\nsparkline"); test("maps score extremes to the lowest and highest bars", () => { assert.equal(sparkline([0]), "▁"); assert.equal(sparkline([100]), "█"); }); test("renders one bar per score and clamps out-of-range values", () => { assert.equal(sparkline([0, 50, 100]).length, 3); assert.equal(sparkline([150]), "█"); assert.equal(sparkline([-10]), "▁"); }); // ── renderHistory ──────────────────────────────────────────────────────────── console.log("\nrenderHistory"); test("reports no history for an empty array", () => { assert.equal(renderHistory([]), "No history found."); }); test("summarizes a single record as one run", () => { const out = renderHistory([{ mode: "PR Review", score: 88 }]); assert.match(out, /review/); assert.match(out, /1 run/); }); test("collapses display-name and canonical records into one mode line", () => { const out = renderHistory([ { mode: "PR Review", score: 70 }, { mode: "review", score: 90 }, ]); assert.match(out, /\+20 over 2 runs/); }); // ── extractRiskCodes ─────────────────────────────────────────────────────── console.log("\nextractRiskCodes"); test("extracts R-codes from text", () => { assert.deepEqual([...extractRiskCodes("R1 and R2 are present")], ["R1", "R2"]); }); test("extracts T-codes from text", () => { assert.deepEqual([...extractRiskCodes("T3 and T6 detected")], ["T3", "T6"]); }); test("returns empty set when no risk codes present", () => { assert.equal(extractRiskCodes("no codes here").size, 0); }); // ── classify ─────────────────────────────────────────────────────────────── console.log("\nclassify"); test("returns 'pass' when all expected codes found with Iron Law and Health Score", () => { const scenario = { expected_output: "R1" }; const aiText = "R1 Symptom: x Source: y Consequence: z Remedy: w Health Score: 85/100"; assert.equal(classify(scenario, aiText), "pass"); }); test("returns 'partial' when some codes found with Iron Law but Health Score absent", () => { const scenario = { expected_output: "R1 R2" }; const aiText = "R1 Symptom: x Source: y Consequence: z Remedy: w"; assert.equal(classify(scenario, aiText), "partial"); }); test("returns 'fail' when no expected codes found in output", () => { const scenario = { expected_output: "R1 R2" }; const aiText = "Symptom: x Source: y Consequence: z Remedy: w Health Score: 85/100"; assert.equal(classify(scenario, aiText), "fail"); }); test("returns 'false-positive-pass' for no_health_score when output has no score", () => { const scenario = { expected_output: "", no_health_score: true }; assert.equal(classify(scenario, "output without a health score"), "false-positive-pass"); }); test("returns 'fail' for no_health_score when Health Score IS present in output", () => { const scenario = { expected_output: "", no_health_score: true }; assert.equal(classify(scenario, "Health Score: 90/100"), "fail"); }); test("returns 'false-positive-pass' for no_risk_codes when expected code is absent", () => { const scenario = { expected_output: "R1", no_risk_codes: true }; assert.equal(classify(scenario, "no risk codes here"), "false-positive-pass"); }); test("returns 'fail' for no_risk_codes when expected code IS present in output", () => { const scenario = { expected_output: "R1", no_risk_codes: true }; assert.equal(classify(scenario, "output mentioning R1"), "fail"); }); test("returns 'false-positive-pass' for no_risk_codes when only an unrelated code appears", () => { const scenario = { expected_output: "R1", no_risk_codes: true }; // AI may flag other risks; only the specific tested code failing is a false-positive assert.equal(classify(scenario, "R2 mentioned here"), "false-positive-pass"); }); test("returns 'fail' when codes found but Iron Law terms absent", () => { const scenario = { expected_output: "R1 R2" }; const aiText = "R1 R2 Health Score: 85/100"; assert.equal(classify(scenario, aiText), "fail"); }); // ── report-parse: parseFindings / countFindings / extractLocation ────────── const SAMPLE_REPORT = [ "# Brooks-Lint Review", "", "**Health Score:** 62/100", "", "## Findings", "", "### 🔴 Critical", "", "**Change Propagation — Divergent change**", "Symptom: src/services/UserService.ts:42 handles auth, email, and billing.", "Source: Refactoring — Divergent Change", "Consequence: Every feature touches the same class.", "Remedy: Split into focused collaborators.", "", "### 🟡 Warning", "", "**Cognitive Overload (R1) — God method**", "Symptom: generate() in report_gen.py takes nine positional parameters.", "Source: A Philosophy of Software Design — shallow modules", "Consequence: Callers must understand the whole signature.", "Remedy: Introduce a ReportOptions object.", "", "### 🟢 Suggestion", "", "**Knowledge Duplication — Shipping rule copied**", "Symptom: the free-shipping threshold appears in cart.js and checkout.js.", "Source: The Pragmatic Programmer — DRY", "Consequence: A policy change must be made in two places.", "Remedy: Extract a single shippingPolicy module.", "", "## Summary", "", "**Bold prose, not a finding** — should be ignored.", ].join("\n"); console.log("\nparseFindings"); test("parses one finding per severity group", () => { assert.equal(parseFindings(SAMPLE_REPORT).length, 3); }); test("maps risk name to code and keeps severity", () => { const [crit, warn, sug] = parseFindings(SAMPLE_REPORT); assert.deepEqual([crit.riskCode, crit.severity], ["R2", "critical"]); assert.deepEqual([warn.riskCode, warn.severity], ["R1", "warning"]); assert.deepEqual([sug.riskCode, sug.severity], ["R3", "suggestion"]); }); test("resolves an explicit (R1) code in the title", () => { const warn = parseFindings(SAMPLE_REPORT)[1]; assert.equal(warn.riskName, "Cognitive Overload"); assert.equal(warn.title, "God method"); }); test("extracts file and line from the Symptom", () => { const crit = parseFindings(SAMPLE_REPORT)[0]; assert.equal(crit.file, "src/services/UserService.ts"); assert.equal(crit.line, 42); }); test("ignores bold text outside any severity group", () => { // The Summary's bold line must not be counted as a finding. assert.ok(parseFindings(SAMPLE_REPORT).every((f) => f.title !== "")); assert.equal(parseFindings(SAMPLE_REPORT).length, 3); }); test("empty report yields no findings", () => { assert.deepEqual(parseFindings(""), []); assert.deepEqual(parseFindings(null), []); }); const VARIANT_REPORT = [ "## Findings", "", "### 🔴 Critical Issues", "", "**Dependency Disorder: models import services**", "Symptom: a cyclic import exists.", "Source: Clean Architecture — the Dependency Rule", "Consequence: the build in app/core/wiring.ts breaks.", "Remedy: invert the dependency toward an interface.", "", "### 🟡 Warnings", "", "**Coverage Illusion — green but hollow**", "Symptom: the suite asserts nothing meaningful.", "Source: How Google Tests Software — coverage signal", "Consequence: regressions slip through unnoticed.", "Remedy: assert on observable outcomes.", ].join("\n"); test("tolerates plural / qualified severity headers", () => { // `### 🔴 Critical Issues` and `### 🟡 Warnings` must still register as groups. const f = parseFindings(VARIANT_REPORT); assert.equal(f.length, 2); assert.deepEqual([f[0].severity, f[1].severity], ["critical", "warning"]); }); test("splits a colon-separated title and resolves its code", () => { const first = parseFindings(VARIANT_REPORT)[0]; assert.equal(first.riskCode, "R5"); assert.equal(first.riskName, "Dependency Disorder"); assert.equal(first.title, "models import services"); }); test("falls back to Consequence for the location when Symptom has none", () => { const first = parseFindings(VARIANT_REPORT)[0]; assert.equal(first.file, "app/core/wiring.ts"); }); console.log("\ncountFindings"); test("counts findings by severity", () => { assert.deepEqual(countFindings(SAMPLE_REPORT), { critical: 1, warning: 1, suggestion: 1 }); }); test("empty report counts all zero", () => { assert.deepEqual(countFindings(""), { critical: 0, warning: 0, suggestion: 0 }); }); console.log("\nextractLocation"); test("captures path with line number", () => { assert.deepEqual(extractLocation("see app/models/order.rb:128 only"), { file: "app/models/order.rb", line: 128, }); }); test("captures bare filename without a line", () => { assert.deepEqual(extractLocation("generate() in report_gen.py"), { file: "report_gen.py", line: null, }); }); test("does not mistake prose for a file reference", () => { assert.deepEqual(extractLocation("nothing here, e.g. no path"), { file: null, line: null }); assert.deepEqual(extractLocation("see line 3 (i.e. nowhere)"), { file: null, line: null }); }); // ── sarif: reportToSarif ─────────────────────────────────────────────────── console.log("\nreportToSarif"); test("emits a SARIF 2.1.0 envelope", () => { const log = reportToSarif(SAMPLE_REPORT, { mode: "review", toolVersion: "1.3.0" }); assert.equal(log.version, "2.1.0"); assert.ok(log.$schema.includes("sarif-2.1.0")); assert.equal(log.runs[0].tool.driver.name, "brooks-lint"); assert.equal(log.runs[0].tool.driver.version, "1.3.0"); }); test("declares a deduped, PascalCased rule per risk code", () => { const rules = reportToSarif(SAMPLE_REPORT).runs[0].tool.driver.rules; assert.deepEqual(rules.map((r) => r.id), ["R2", "R1", "R3"]); assert.equal(rules[0].name, "ChangePropagation"); }); test("maps severities to SARIF levels", () => { const results = reportToSarif(SAMPLE_REPORT).runs[0].results; assert.deepEqual(results.map((r) => r.level), ["error", "warning", "note"]); }); test("attaches a physical location when a file is known", () => { const first = reportToSarif(SAMPLE_REPORT).runs[0].results[0]; const loc = first.locations[0].physicalLocation; assert.equal(loc.artifactLocation.uri, "src/services/UserService.ts"); assert.equal(loc.region.startLine, 42); assert.ok(first.message.text.includes("Remedy:")); }); test("fingerprints are stable across runs", () => { const a = reportToSarif(SAMPLE_REPORT).runs[0].results[0].partialFingerprints.brooksLint; const b = reportToSarif(SAMPLE_REPORT).runs[0].results[0].partialFingerprints.brooksLint; assert.equal(a, b); }); test("empty report yields no rules or results", () => { const log = reportToSarif(""); assert.deepEqual(log.runs[0].tool.driver.rules, []); assert.deepEqual(log.runs[0].results, []); }); test("routes T-code helpUri off the guide (no #t anchor) and R-code onto it", () => { const rules = reportToSarif(VARIANT_REPORT).runs[0].tool.driver.rules; const r5 = rules.find((r) => r.id === "R5"); const t5 = rules.find((r) => r.id === "T5"); assert.ok(r5.helpUri.endsWith("guide.html#r5")); assert.ok(t5.helpUri.includes("test-decay-risks.md")); assert.ok(!t5.helpUri.includes("#t5")); }); test("declares a BL000 rule when a finding is unmapped", () => { const unmapped = [ "## Findings", "", "### 🔴 Critical", "", "**Some Unknown Smell — mystery**", "Symptom: something odd in foo.ts.", "Consequence: unclear impact.", "Remedy: investigate.", ].join("\n"); const run = reportToSarif(unmapped).runs[0]; assert.equal(run.results[0].ruleId, "BL000"); assert.ok(run.tool.driver.rules.some((r) => r.id === "BL000")); }); // ── ci-gate: severityBreached / isRegression ─────────────────────────────── console.log("\nseverityBreached"); test("fail-on critical trips only on a critical finding", () => { assert.equal(severityBreached({ critical: 1, warning: 0, suggestion: 0 }, "critical"), true); assert.equal(severityBreached({ critical: 0, warning: 5, suggestion: 9 }, "critical"), false); }); test("fail-on warning trips on critical or warning", () => { assert.equal(severityBreached({ critical: 0, warning: 1, suggestion: 0 }, "warning"), true); assert.equal(severityBreached({ critical: 2, warning: 0, suggestion: 0 }, "warning"), true); assert.equal(severityBreached({ critical: 0, warning: 0, suggestion: 3 }, "warning"), false); }); test("fail-on none never trips", () => { assert.equal(severityBreached({ critical: 9, warning: 9, suggestion: 9 }, "none"), false); }); test("missing or partial findings are treated as zero", () => { assert.equal(severityBreached(undefined, "critical"), false); assert.equal(severityBreached({}, "warning"), false); }); console.log("\nisRegression"); test("only a negative numeric delta is a regression", () => { assert.equal(isRegression(-1), true); assert.equal(isRegression(0), false); assert.equal(isRegression(5), false); assert.equal(isRegression(null), false); assert.equal(isRegression(undefined), false); }); // ── Parser-fidelity benchmark on the FROZEN real-report corpus ───────────── // Deterministic regression guard: the shipped parser must reproduce the // independently-graded finding inventory of 30 real model-generated reports. // This is the non-circular counterpart to the synthetic SAMPLE_REPORT tests // above — the reports here are real model output, the truth was graded by a // separate pass and spot-checked by hand. See scripts/benchmark.mjs. console.log("\nparser-fidelity benchmark (frozen real-report corpus)"); const CORPUS = JSON.parse(readFileSync(path.join(__dirname, "..", "evals", "benchmark-corpus.json"), "utf8")); const BENCH = summarize(CORPUS); test("corpus has >= 30 real reports spanning all six modes", () => { assert.ok(CORPUS.samples.length >= 30, `expected >=30 samples, got ${CORPUS.samples.length}`); const modes = new Set(CORPUS.samples.map((s) => s.mode)); for (const m of VALID_MODES) assert.ok(modes.has(m), `corpus is missing mode ${m}`); }); test("corpus composition matches the documented numbers (30 total, 9 false-positive)", () => { // These exact counts are published in the README "Reproducible benchmarks" // section — fail loudly if a corpus regen changes them without a docs update. assert.equal(CORPUS.samples.length, 30); assert.equal(CORPUS.samples.filter((s) => s.isFP).length, 9); }); test("parser reproduces the graded severity counts on every report", () => { const bad = BENCH.rows.filter((r) => !r.countMatch).map((r) => `${r.id}: truth ${r.truth} vs parser ${r.parser}`); assert.equal(bad.length, 0, `count mismatches: ${bad.join("; ")}`); }); test("every report emits valid SARIF 2.1.0", () => { const bad = BENCH.rows.filter((r) => !r.sarifValid).map((r) => r.id); assert.equal(bad.length, 0, `invalid SARIF for: ${bad.join(", ")}`); }); test("risk-code extraction has zero false positives / negatives on the corpus", () => { assert.equal(BENCH.fp, 0, `${BENCH.fp} false-positive code(s)`); assert.equal(BENCH.fn, 0, `${BENCH.fn} false-negative code(s)`); assert.equal(BENCH.precision, 1); assert.equal(BENCH.recall, 1); }); // ── Integration: validate-repo.mjs passes against current repo ───────────── console.log("\nvalidate-repo integration"); test("validate-repo.mjs exits 0 against the current repository", () => { execFileSync("node", [path.join(__dirname, "validate-repo.mjs")], { encoding: "utf8" }); // execFileSync throws on non-zero exit — reaching here means exit 0 }); // ── Summary ──────────────────────────────────────────────────────────────── console.log(`\n${passed + failed} tests: ${passed} passed, ${failed} failed`); if (failed > 0) process.exit(1);