playbook/brooks-lint/scripts/report-parse.mjs

178 lines
6.3 KiB
JavaScript
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/**
* Parse a brooks-lint Markdown report into structured findings.
*
* The report format is defined by the Report Template in skills/_shared/common.md:
* findings live under severity sub-headers (### 🔴 Critical / 🟡 Warning /
* 🟢 Suggestion), each finding is a bold title line `**Risk Name — title**`
* followed by Symptom / Source / Consequence / Remedy fields.
*
* Consumed by sarif.mjs (SARIF export) and ci-review.mjs (severity gates).
* Best-effort: the report is LLM-authored, so the parser tolerates bracket
* placeholders, an explicit `(R2)` code, and inline severity emoji.
*/
/** Canonical risk code → display name (decay-risks.md + test-decay-risks.md). */
export const RISK_CATALOG = {
R1: "Cognitive Overload",
R2: "Change Propagation",
R3: "Knowledge Duplication",
R4: "Accidental Complexity",
R5: "Dependency Disorder",
R6: "Domain Model Distortion",
T1: "Test Obscurity",
T2: "Test Brittleness",
T3: "Test Duplication",
T4: "Mock Abuse",
T5: "Coverage Illusion",
T6: "Architecture Mismatch",
};
const NAME_TO_CODE = Object.fromEntries(
Object.entries(RISK_CATALOG).map(([code, name]) => [name.toLowerCase(), code]),
);
// The template prescribes a bare `### 🔴 Critical`, but LLM output drifts —
// tolerate a plural and a trailing "Issues"/"Findings"/"Items" qualifier while
// still anchoring on the line so section headers like `## Findings` never match.
const SEVERITY_HEADER_RE =
/^#{2,6}\s*(?:🔴|🟡|🟢|⚠️?|❗)?\s*(Critical|Warning|Suggestion)s?(?:\s+(?:Issues?|Findings?|Items?))?\s*:?\s*$/i;
const SECTION_HEADER_RE = /^#{1,6}\s/;
const BOLD_TITLE_RE = /^\s*(?:🔴|🟡|🟢)?\s*\*\*(.+?)\*\*\s*$/;
const FIELD_RE = /^\s*(Symptom|Source|Consequence|Remedy)\s*[:]\s*(.*)$/i;
const EMOJI_SEVERITY = { "🔴": "critical", "🟡": "warning", "🟢": "suggestion" };
// A path with a directory separator, or a bare filename with a known source
// extension — optionally followed by `:line`. The extension allowlist keeps
// prose like "e.g." or "i.e." from being mistaken for a file reference.
const LOCATION_RE =
/([\w.-]*\/[\w./-]*\.\w+|[\w.-]+\.(?:ts|tsx|js|jsx|mjs|cjs|py|java|go|rb|rs|cc|cpp|cxx|c|h|hpp|cs|php|kt|kts|swift|scala|vue|sql|rsx|m|mm))(?::(\d+))?/;
function splitTitle(bold) {
// Dash is the template separator; a colon is a common LLM variant. `.match`
// returns the leftmost hit, so a dash still wins when both are present.
const sep = bold.match(/\s*[—–]\s*|\s+--\s+|\s+-\s+|\s*:\s*/);
if (!sep) return { namePart: bold.trim(), title: "" };
return {
namePart: bold.slice(0, sep.index).trim(),
title: bold.slice(sep.index + sep[0].length).trim(),
};
}
function resolveCode(namePart) {
const explicit = namePart.match(/\b([RT][1-6])\b/);
if (explicit) return explicit[1].toUpperCase();
const cleaned = namePart
.replace(/\(([RT][1-6])\)/i, "")
.replace(/[[\]]/g, "")
.trim()
.toLowerCase();
if (NAME_TO_CODE[cleaned]) return NAME_TO_CODE[cleaned];
// Fallback: a missed separator can leave trailing words on the name, so match
// the longest known risk name the cleaned string starts with.
const prefix = Object.keys(NAME_TO_CODE)
.filter((name) => cleaned.startsWith(name))
.sort((a, b) => b.length - a.length)[0];
return prefix ? NAME_TO_CODE[prefix] : null;
}
/** Extract `{ file, line }` from text, or `{ file: null, line: null }`. */
export function extractLocation(text) {
const m = (text ?? "").match(LOCATION_RE);
if (!m) return { file: null, line: null };
// Group 1 is the path; group 2 is the optional `:line` (the extension list is
// a non-capturing group, so the line digits are m[2], not m[3]).
return { file: m[1], line: m[2] ? parseInt(m[2], 10) : null };
}
/**
* Parse a report into an array of findings.
* @returns {Array<{severity, riskCode, riskName, title, symptom, source,
* consequence, remedy, file, line}>}
*/
export function parseFindings(report) {
const lines = (report ?? "").split(/\r?\n/);
const findings = [];
let severity = null;
let current = null;
let field = null;
const commit = () => {
if (!current) return;
// Keep only blocks that look like real findings (a known risk or a symptom).
if (current.riskCode || current.symptom) findings.push(current);
current = null;
field = null;
};
for (const line of lines) {
const sevHeader = line.match(SEVERITY_HEADER_RE);
if (sevHeader) {
commit();
severity = sevHeader[1].toLowerCase();
continue;
}
if (SECTION_HEADER_RE.test(line)) {
// A non-severity header ends the current Findings group (e.g. ## Summary).
commit();
severity = null;
continue;
}
const bold = severity && line.match(BOLD_TITLE_RE);
if (bold) {
commit();
const emoji = line.match(/^\s*(🔴|🟡|🟢)/);
const { namePart, title } = splitTitle(bold[1]);
const riskCode = resolveCode(namePart);
current = {
severity: emoji ? EMOJI_SEVERITY[emoji[1]] : severity,
riskCode,
riskName: riskCode ? RISK_CATALOG[riskCode] : namePart.replace(/[[\]]/g, "").trim(),
title,
symptom: "",
source: "",
consequence: "",
remedy: "",
file: null,
line: null,
};
field = null;
continue;
}
if (!current) continue;
const fieldMatch = line.match(FIELD_RE);
if (fieldMatch) {
field = fieldMatch[1].toLowerCase();
current[field] = fieldMatch[2].trim();
continue;
}
// Continuation line for the field in progress.
if (field && line.trim()) {
current[field] = `${current[field]} ${line.trim()}`.trim();
}
}
commit();
for (const f of findings) {
// Location belongs in the Symptom, but fall back to Source/Consequence when
// it's absent. Remedy is excluded — it often names a destination, not the
// site of the finding.
const fromSymptom = extractLocation(f.symptom);
const loc = fromSymptom.file ? fromSymptom : extractLocation(`${f.source} ${f.consequence}`);
f.file = loc.file;
f.line = loc.line;
}
return findings;
}
/** Count findings by severity. @returns {{critical, warning, suggestion}} */
export function countFindings(report) {
const counts = { critical: 0, warning: 0, suggestion: 0 };
for (const f of parseFindings(report)) {
if (counts[f.severity] !== undefined) counts[f.severity] += 1;
}
return counts;
}