156 lines
6.7 KiB
JavaScript
156 lines
6.7 KiB
JavaScript
// 4-axis rubric (specificity, actionability, grounding, evidence) → bucket. See references/recommendations.md.
|
|
// Account-scope (platform_*) recs use a separate grounding/evidence pair — they structurally cannot produce file:line.
|
|
|
|
const HEDGE_WORDS = /\b(consider|might|may|could|perhaps|maybe|likely|probably)\b/gi;
|
|
const VERB_OPENERS = /^\s*(?:[-*]\s+|\d+[.)]\s+|[*_]+)?(?:add|set|enable|disable|replace|remove|move|wrap|cache|defer|parallelize|introduce|configure|update|change|switch|opt[-\s]?in|opt[-\s]?out|export|import|install|run|delete|rename)/im;
|
|
const COUNT_WORDS_RE = /\b(errors?|queries|invocations|requests|reads|writes|bytes|fetch(?:es)?|calls?|hits?|misses?|seconds?|images?|deployments?|cold[- ]?starts?|users?)\b/gi;
|
|
const UNIT_RE = /\b\d[\d.,]*\s*(?:%|ms|s|sec|seconds?|min|minutes?|h|hours?|GB|MB|KB|K|M|B|rps|qps|req\/s|reqs?\/min)\b/gi;
|
|
const CODE_FENCE_RE = /```[\s\S]*?```/g;
|
|
const INLINE_CODE_RE = /`[^`\n]{10,}`/g;
|
|
const FILE_LINE_RE = /[\w/.\-()\[\]]+\.\w+:\d+/g;
|
|
|
|
// Grounding + evidence are lie-detectors — weighted higher than specificity/actionability, which LLMs can game with fluff.
|
|
const W = { grounding: 0.35, evidence: 0.30, specificity: 0.20, actionability: 0.15 };
|
|
|
|
export function gradeRecommendation(rec, ctx = {}) {
|
|
const accountScope = isAccountScope(rec);
|
|
const specificity = scoreSpecificity(rec);
|
|
const actionability = scoreActionability(rec);
|
|
const grounding = accountScope ? scoreGroundingAccount(rec) : scoreGrounding(rec, ctx);
|
|
const evidence = accountScope ? scoreEvidenceAccount(rec) : scoreEvidence(rec);
|
|
const overall = roundTo(
|
|
grounding * W.grounding + evidence * W.evidence + specificity * W.specificity + actionability * W.actionability,
|
|
4,
|
|
);
|
|
return {
|
|
specificity, actionability, grounding, evidence, overall,
|
|
grade: grade(overall),
|
|
scope: accountScope ? 'account' : 'route',
|
|
};
|
|
}
|
|
|
|
function isAccountScope(rec) {
|
|
if (rec?.scope === 'account') return true;
|
|
const ref = rec?.candidateRef;
|
|
if (typeof ref === 'string' && ref.startsWith('platform_')) return true;
|
|
return false;
|
|
}
|
|
|
|
function grade(overall) {
|
|
if (overall >= 0.85) return 'Excellent';
|
|
if (overall >= 0.70) return 'Good';
|
|
if (overall >= 0.55) return 'Fair';
|
|
return 'Poor';
|
|
}
|
|
|
|
function scoreSpecificity(rec) {
|
|
let s = 0;
|
|
const codeText = [rec.fix, rec.currentBehavior, rec.desiredBehavior].filter((x) => typeof x === 'string').join('\n');
|
|
const hasFence = CODE_FENCE_RE.test(codeText);
|
|
CODE_FENCE_RE.lastIndex = 0;
|
|
if (hasFence) s += 0.5;
|
|
if (INLINE_CODE_RE.test(codeText)) s += 0.2;
|
|
INLINE_CODE_RE.lastIndex = 0;
|
|
if (Array.isArray(rec.affectedFiles) && rec.affectedFiles.length > 0) s += 0.2;
|
|
if (Array.isArray(rec.findingRefs) && rec.findingRefs.some((r) => /:\d+/.test(r))) s += 0.3;
|
|
return Math.min(1, roundTo(s, 4));
|
|
}
|
|
|
|
function scoreActionability(rec) {
|
|
const text = typeof rec.fix === 'string' ? rec.fix : '';
|
|
if (!text) return 0;
|
|
let s = 0;
|
|
if (VERB_OPENERS.test(text)) s += 0.35;
|
|
const stepCount = (text.match(/(?:^|\n)\s*(?:\d+[.)]\s+|[-*]\s+)/g) ?? []).length;
|
|
if (stepCount >= 2) s += 0.35;
|
|
else if (stepCount === 1) s += 0.15;
|
|
const hedges = (text.match(HEDGE_WORDS) ?? []).length;
|
|
HEDGE_WORDS.lastIndex = 0;
|
|
s -= Math.min(0.3, hedges * 0.1);
|
|
// Baseline so a verb-only one-liner still scores.
|
|
s += 0.3;
|
|
return Math.max(0, Math.min(1, roundTo(s, 4)));
|
|
}
|
|
|
|
function scoreGrounding(rec, ctx) {
|
|
let s = 0;
|
|
const knownFindings = Array.isArray(ctx.knownFindings) ? ctx.knownFindings : [];
|
|
const findingKeys = new Set(knownFindings.map((f) => `${f.file}:${f.line}`));
|
|
const refs = Array.isArray(rec.findingRefs) ? rec.findingRefs : [];
|
|
const matched = refs.filter((r) => findingKeys.has(r));
|
|
if (matched.length > 0) s += 0.5;
|
|
else if (refs.length > 0) s += 0.25;
|
|
if (Array.isArray(rec.affectedFiles) && rec.affectedFiles.length > 0) s += 0.25;
|
|
const fenceText = [rec.currentBehavior, rec.desiredBehavior].filter((x) => typeof x === 'string').join('\n');
|
|
if (CODE_FENCE_RE.test(fenceText)) s += 0.25;
|
|
CODE_FENCE_RE.lastIndex = 0;
|
|
if (typeof rec.candidateRef === 'string' && rec.candidateRef.length > 0) s += 0.1;
|
|
return Math.min(1, roundTo(s, 4));
|
|
}
|
|
|
|
function scoreEvidence(rec) {
|
|
const text = [rec.what, rec.why, rec.fix, rec.verify]
|
|
.filter((x) => typeof x === 'string').join('\n');
|
|
if (!text) return 0;
|
|
const counts = (text.match(COUNT_WORDS_RE) ?? []).length;
|
|
COUNT_WORDS_RE.lastIndex = 0;
|
|
const units = (text.match(UNIT_RE) ?? []).length;
|
|
UNIT_RE.lastIndex = 0;
|
|
const filelines = (text.match(FILE_LINE_RE) ?? []).length;
|
|
FILE_LINE_RE.lastIndex = 0;
|
|
// file:line is the gold standard.
|
|
let s = Math.min(0.5, filelines * 0.2)
|
|
+ Math.min(0.3, units * 0.075)
|
|
+ Math.min(0.2, counts * 0.05);
|
|
return Math.min(1, roundTo(s, 4));
|
|
}
|
|
|
|
// No findingRefs/code fences possible — grade structural tie to gate + signal-quoting.
|
|
function scoreGroundingAccount(rec) {
|
|
let s = 0;
|
|
if (typeof rec.candidateRef === 'string' && rec.candidateRef.startsWith('platform_')) s += 0.4;
|
|
else if (typeof rec.candidateRef === 'string' && rec.candidateRef.length > 0) s += 0.2;
|
|
// Quoting deep-dive data in why/fix is the account-scope equivalent of citing file:line.
|
|
const text = [rec.why, rec.fix, rec.verify].filter((x) => typeof x === 'string').join('\n');
|
|
const units = (text.match(UNIT_RE) ?? []).length;
|
|
UNIT_RE.lastIndex = 0;
|
|
if (units >= 3) s += 0.4;
|
|
else if (units >= 1) s += 0.2;
|
|
const citations = Array.isArray(rec.citations) ? rec.citations.length : 0;
|
|
if (citations >= 2) s += 0.2;
|
|
else if (citations >= 1) s += 0.1;
|
|
return Math.min(1, roundTo(s, 4));
|
|
}
|
|
|
|
// Heavily weighted toward magnitude quoting — vague platform recs should score low.
|
|
function scoreEvidenceAccount(rec) {
|
|
const text = [rec.what, rec.why, rec.fix, rec.verify]
|
|
.filter((x) => typeof x === 'string').join('\n');
|
|
if (!text) return 0;
|
|
const counts = (text.match(COUNT_WORDS_RE) ?? []).length;
|
|
COUNT_WORDS_RE.lastIndex = 0;
|
|
const units = (text.match(UNIT_RE) ?? []).length;
|
|
UNIT_RE.lastIndex = 0;
|
|
// Higher weight than route-scope variant — file:line gold standard isn't available.
|
|
let s = Math.min(0.55, units * 0.15) + Math.min(0.35, counts * 0.08);
|
|
if (typeof rec.o11ySignal === 'string' && rec.o11ySignal.length > 0) s += 0.1;
|
|
return Math.min(1, roundTo(s, 4));
|
|
}
|
|
|
|
function roundTo(n, d) {
|
|
const f = 10 ** d;
|
|
return Math.round(n * f) / f;
|
|
}
|
|
|
|
// 0.55 = Poor/Fair boundary. Recommending Poor-graded items erodes trust faster than the marginal recall benefit.
|
|
export function applyQualityFloor(recs, floor = 0.55) {
|
|
const kept = [];
|
|
const dropped = [];
|
|
for (const rec of recs) {
|
|
const o = rec?.quality?.overall ?? 0;
|
|
if (o < floor) dropped.push({ rec, reason: `quality.overall=${o} < floor=${floor}` });
|
|
else kept.push(rec);
|
|
}
|
|
return { kept, dropped };
|
|
}
|