178 lines
6.3 KiB
JavaScript
178 lines
6.3 KiB
JavaScript
/**
|
||
* Parse a brooks-lint Markdown report into structured findings.
|
||
*
|
||
* The report format is defined by the Report Template in skills/_shared/common.md:
|
||
* findings live under severity sub-headers (### 🔴 Critical / 🟡 Warning /
|
||
* 🟢 Suggestion), each finding is a bold title line `**Risk Name — title**`
|
||
* followed by Symptom / Source / Consequence / Remedy fields.
|
||
*
|
||
* Consumed by sarif.mjs (SARIF export) and ci-review.mjs (severity gates).
|
||
* Best-effort: the report is LLM-authored, so the parser tolerates bracket
|
||
* placeholders, an explicit `(R2)` code, and inline severity emoji.
|
||
*/
|
||
|
||
/** Canonical risk code → display name (decay-risks.md + test-decay-risks.md). */
|
||
export const RISK_CATALOG = {
|
||
R1: "Cognitive Overload",
|
||
R2: "Change Propagation",
|
||
R3: "Knowledge Duplication",
|
||
R4: "Accidental Complexity",
|
||
R5: "Dependency Disorder",
|
||
R6: "Domain Model Distortion",
|
||
T1: "Test Obscurity",
|
||
T2: "Test Brittleness",
|
||
T3: "Test Duplication",
|
||
T4: "Mock Abuse",
|
||
T5: "Coverage Illusion",
|
||
T6: "Architecture Mismatch",
|
||
};
|
||
|
||
const NAME_TO_CODE = Object.fromEntries(
|
||
Object.entries(RISK_CATALOG).map(([code, name]) => [name.toLowerCase(), code]),
|
||
);
|
||
|
||
// The template prescribes a bare `### 🔴 Critical`, but LLM output drifts —
|
||
// tolerate a plural and a trailing "Issues"/"Findings"/"Items" qualifier while
|
||
// still anchoring on the line so section headers like `## Findings` never match.
|
||
const SEVERITY_HEADER_RE =
|
||
/^#{2,6}\s*(?:🔴|🟡|🟢|⚠️?|❗)?\s*(Critical|Warning|Suggestion)s?(?:\s+(?:Issues?|Findings?|Items?))?\s*:?\s*$/i;
|
||
const SECTION_HEADER_RE = /^#{1,6}\s/;
|
||
const BOLD_TITLE_RE = /^\s*(?:🔴|🟡|🟢)?\s*\*\*(.+?)\*\*\s*$/;
|
||
const FIELD_RE = /^\s*(Symptom|Source|Consequence|Remedy)\s*[::]\s*(.*)$/i;
|
||
const EMOJI_SEVERITY = { "🔴": "critical", "🟡": "warning", "🟢": "suggestion" };
|
||
|
||
// A path with a directory separator, or a bare filename with a known source
|
||
// extension — optionally followed by `:line`. The extension allowlist keeps
|
||
// prose like "e.g." or "i.e." from being mistaken for a file reference.
|
||
const LOCATION_RE =
|
||
/([\w.-]*\/[\w./-]*\.\w+|[\w.-]+\.(?:ts|tsx|js|jsx|mjs|cjs|py|java|go|rb|rs|cc|cpp|cxx|c|h|hpp|cs|php|kt|kts|swift|scala|vue|sql|rsx|m|mm))(?::(\d+))?/;
|
||
|
||
function splitTitle(bold) {
|
||
// Dash is the template separator; a colon is a common LLM variant. `.match`
|
||
// returns the leftmost hit, so a dash still wins when both are present.
|
||
const sep = bold.match(/\s*[—–]\s*|\s+--\s+|\s+-\s+|\s*:\s*/);
|
||
if (!sep) return { namePart: bold.trim(), title: "" };
|
||
return {
|
||
namePart: bold.slice(0, sep.index).trim(),
|
||
title: bold.slice(sep.index + sep[0].length).trim(),
|
||
};
|
||
}
|
||
|
||
function resolveCode(namePart) {
|
||
const explicit = namePart.match(/\b([RT][1-6])\b/);
|
||
if (explicit) return explicit[1].toUpperCase();
|
||
const cleaned = namePart
|
||
.replace(/\(([RT][1-6])\)/i, "")
|
||
.replace(/[[\]]/g, "")
|
||
.trim()
|
||
.toLowerCase();
|
||
if (NAME_TO_CODE[cleaned]) return NAME_TO_CODE[cleaned];
|
||
// Fallback: a missed separator can leave trailing words on the name, so match
|
||
// the longest known risk name the cleaned string starts with.
|
||
const prefix = Object.keys(NAME_TO_CODE)
|
||
.filter((name) => cleaned.startsWith(name))
|
||
.sort((a, b) => b.length - a.length)[0];
|
||
return prefix ? NAME_TO_CODE[prefix] : null;
|
||
}
|
||
|
||
/** Extract `{ file, line }` from text, or `{ file: null, line: null }`. */
|
||
export function extractLocation(text) {
|
||
const m = (text ?? "").match(LOCATION_RE);
|
||
if (!m) return { file: null, line: null };
|
||
// Group 1 is the path; group 2 is the optional `:line` (the extension list is
|
||
// a non-capturing group, so the line digits are m[2], not m[3]).
|
||
return { file: m[1], line: m[2] ? parseInt(m[2], 10) : null };
|
||
}
|
||
|
||
/**
|
||
* Parse a report into an array of findings.
|
||
* @returns {Array<{severity, riskCode, riskName, title, symptom, source,
|
||
* consequence, remedy, file, line}>}
|
||
*/
|
||
export function parseFindings(report) {
|
||
const lines = (report ?? "").split(/\r?\n/);
|
||
const findings = [];
|
||
let severity = null;
|
||
let current = null;
|
||
let field = null;
|
||
|
||
const commit = () => {
|
||
if (!current) return;
|
||
// Keep only blocks that look like real findings (a known risk or a symptom).
|
||
if (current.riskCode || current.symptom) findings.push(current);
|
||
current = null;
|
||
field = null;
|
||
};
|
||
|
||
for (const line of lines) {
|
||
const sevHeader = line.match(SEVERITY_HEADER_RE);
|
||
if (sevHeader) {
|
||
commit();
|
||
severity = sevHeader[1].toLowerCase();
|
||
continue;
|
||
}
|
||
if (SECTION_HEADER_RE.test(line)) {
|
||
// A non-severity header ends the current Findings group (e.g. ## Summary).
|
||
commit();
|
||
severity = null;
|
||
continue;
|
||
}
|
||
|
||
const bold = severity && line.match(BOLD_TITLE_RE);
|
||
if (bold) {
|
||
commit();
|
||
const emoji = line.match(/^\s*(🔴|🟡|🟢)/);
|
||
const { namePart, title } = splitTitle(bold[1]);
|
||
const riskCode = resolveCode(namePart);
|
||
current = {
|
||
severity: emoji ? EMOJI_SEVERITY[emoji[1]] : severity,
|
||
riskCode,
|
||
riskName: riskCode ? RISK_CATALOG[riskCode] : namePart.replace(/[[\]]/g, "").trim(),
|
||
title,
|
||
symptom: "",
|
||
source: "",
|
||
consequence: "",
|
||
remedy: "",
|
||
file: null,
|
||
line: null,
|
||
};
|
||
field = null;
|
||
continue;
|
||
}
|
||
|
||
if (!current) continue;
|
||
|
||
const fieldMatch = line.match(FIELD_RE);
|
||
if (fieldMatch) {
|
||
field = fieldMatch[1].toLowerCase();
|
||
current[field] = fieldMatch[2].trim();
|
||
continue;
|
||
}
|
||
// Continuation line for the field in progress.
|
||
if (field && line.trim()) {
|
||
current[field] = `${current[field]} ${line.trim()}`.trim();
|
||
}
|
||
}
|
||
commit();
|
||
|
||
for (const f of findings) {
|
||
// Location belongs in the Symptom, but fall back to Source/Consequence when
|
||
// it's absent. Remedy is excluded — it often names a destination, not the
|
||
// site of the finding.
|
||
const fromSymptom = extractLocation(f.symptom);
|
||
const loc = fromSymptom.file ? fromSymptom : extractLocation(`${f.source} ${f.consequence}`);
|
||
f.file = loc.file;
|
||
f.line = loc.line;
|
||
}
|
||
return findings;
|
||
}
|
||
|
||
/** Count findings by severity. @returns {{critical, warning, suggestion}} */
|
||
export function countFindings(report) {
|
||
const counts = { critical: 0, warning: 0, suggestion: 0 };
|
||
for (const f of parseFindings(report)) {
|
||
if (counts[f.severity] !== undefined) counts[f.severity] += 1;
|
||
}
|
||
return counts;
|
||
}
|