playbook/antigravity-awesome-skills/plugins/antigravity-awesome-skills-.../skills/vercel-optimize/lib/grade-recommendation.mjs

156 lines
6.7 KiB
JavaScript

// 4-axis rubric (specificity, actionability, grounding, evidence) → bucket. See references/recommendations.md.
// Account-scope (platform_*) recs use a separate grounding/evidence pair — they structurally cannot produce file:line.
const HEDGE_WORDS = /\b(consider|might|may|could|perhaps|maybe|likely|probably)\b/gi;
const VERB_OPENERS = /^\s*(?:[-*]\s+|\d+[.)]\s+|[*_]+)?(?:add|set|enable|disable|replace|remove|move|wrap|cache|defer|parallelize|introduce|configure|update|change|switch|opt[-\s]?in|opt[-\s]?out|export|import|install|run|delete|rename)/im;
const COUNT_WORDS_RE = /\b(errors?|queries|invocations|requests|reads|writes|bytes|fetch(?:es)?|calls?|hits?|misses?|seconds?|images?|deployments?|cold[- ]?starts?|users?)\b/gi;
const UNIT_RE = /\b\d[\d.,]*\s*(?:%|ms|s|sec|seconds?|min|minutes?|h|hours?|GB|MB|KB|K|M|B|rps|qps|req\/s|reqs?\/min)\b/gi;
const CODE_FENCE_RE = /```[\s\S]*?```/g;
const INLINE_CODE_RE = /`[^`\n]{10,}`/g;
const FILE_LINE_RE = /[\w/.\-()\[\]]+\.\w+:\d+/g;
// Grounding + evidence are lie-detectors — weighted higher than specificity/actionability, which LLMs can game with fluff.
const W = { grounding: 0.35, evidence: 0.30, specificity: 0.20, actionability: 0.15 };
export function gradeRecommendation(rec, ctx = {}) {
const accountScope = isAccountScope(rec);
const specificity = scoreSpecificity(rec);
const actionability = scoreActionability(rec);
const grounding = accountScope ? scoreGroundingAccount(rec) : scoreGrounding(rec, ctx);
const evidence = accountScope ? scoreEvidenceAccount(rec) : scoreEvidence(rec);
const overall = roundTo(
grounding * W.grounding + evidence * W.evidence + specificity * W.specificity + actionability * W.actionability,
4,
);
return {
specificity, actionability, grounding, evidence, overall,
grade: grade(overall),
scope: accountScope ? 'account' : 'route',
};
}
function isAccountScope(rec) {
if (rec?.scope === 'account') return true;
const ref = rec?.candidateRef;
if (typeof ref === 'string' && ref.startsWith('platform_')) return true;
return false;
}
function grade(overall) {
if (overall >= 0.85) return 'Excellent';
if (overall >= 0.70) return 'Good';
if (overall >= 0.55) return 'Fair';
return 'Poor';
}
function scoreSpecificity(rec) {
let s = 0;
const codeText = [rec.fix, rec.currentBehavior, rec.desiredBehavior].filter((x) => typeof x === 'string').join('\n');
const hasFence = CODE_FENCE_RE.test(codeText);
CODE_FENCE_RE.lastIndex = 0;
if (hasFence) s += 0.5;
if (INLINE_CODE_RE.test(codeText)) s += 0.2;
INLINE_CODE_RE.lastIndex = 0;
if (Array.isArray(rec.affectedFiles) && rec.affectedFiles.length > 0) s += 0.2;
if (Array.isArray(rec.findingRefs) && rec.findingRefs.some((r) => /:\d+/.test(r))) s += 0.3;
return Math.min(1, roundTo(s, 4));
}
function scoreActionability(rec) {
const text = typeof rec.fix === 'string' ? rec.fix : '';
if (!text) return 0;
let s = 0;
if (VERB_OPENERS.test(text)) s += 0.35;
const stepCount = (text.match(/(?:^|\n)\s*(?:\d+[.)]\s+|[-*]\s+)/g) ?? []).length;
if (stepCount >= 2) s += 0.35;
else if (stepCount === 1) s += 0.15;
const hedges = (text.match(HEDGE_WORDS) ?? []).length;
HEDGE_WORDS.lastIndex = 0;
s -= Math.min(0.3, hedges * 0.1);
// Baseline so a verb-only one-liner still scores.
s += 0.3;
return Math.max(0, Math.min(1, roundTo(s, 4)));
}
function scoreGrounding(rec, ctx) {
let s = 0;
const knownFindings = Array.isArray(ctx.knownFindings) ? ctx.knownFindings : [];
const findingKeys = new Set(knownFindings.map((f) => `${f.file}:${f.line}`));
const refs = Array.isArray(rec.findingRefs) ? rec.findingRefs : [];
const matched = refs.filter((r) => findingKeys.has(r));
if (matched.length > 0) s += 0.5;
else if (refs.length > 0) s += 0.25;
if (Array.isArray(rec.affectedFiles) && rec.affectedFiles.length > 0) s += 0.25;
const fenceText = [rec.currentBehavior, rec.desiredBehavior].filter((x) => typeof x === 'string').join('\n');
if (CODE_FENCE_RE.test(fenceText)) s += 0.25;
CODE_FENCE_RE.lastIndex = 0;
if (typeof rec.candidateRef === 'string' && rec.candidateRef.length > 0) s += 0.1;
return Math.min(1, roundTo(s, 4));
}
function scoreEvidence(rec) {
const text = [rec.what, rec.why, rec.fix, rec.verify]
.filter((x) => typeof x === 'string').join('\n');
if (!text) return 0;
const counts = (text.match(COUNT_WORDS_RE) ?? []).length;
COUNT_WORDS_RE.lastIndex = 0;
const units = (text.match(UNIT_RE) ?? []).length;
UNIT_RE.lastIndex = 0;
const filelines = (text.match(FILE_LINE_RE) ?? []).length;
FILE_LINE_RE.lastIndex = 0;
// file:line is the gold standard.
let s = Math.min(0.5, filelines * 0.2)
+ Math.min(0.3, units * 0.075)
+ Math.min(0.2, counts * 0.05);
return Math.min(1, roundTo(s, 4));
}
// No findingRefs/code fences possible — grade structural tie to gate + signal-quoting.
function scoreGroundingAccount(rec) {
let s = 0;
if (typeof rec.candidateRef === 'string' && rec.candidateRef.startsWith('platform_')) s += 0.4;
else if (typeof rec.candidateRef === 'string' && rec.candidateRef.length > 0) s += 0.2;
// Quoting deep-dive data in why/fix is the account-scope equivalent of citing file:line.
const text = [rec.why, rec.fix, rec.verify].filter((x) => typeof x === 'string').join('\n');
const units = (text.match(UNIT_RE) ?? []).length;
UNIT_RE.lastIndex = 0;
if (units >= 3) s += 0.4;
else if (units >= 1) s += 0.2;
const citations = Array.isArray(rec.citations) ? rec.citations.length : 0;
if (citations >= 2) s += 0.2;
else if (citations >= 1) s += 0.1;
return Math.min(1, roundTo(s, 4));
}
// Heavily weighted toward magnitude quoting — vague platform recs should score low.
function scoreEvidenceAccount(rec) {
const text = [rec.what, rec.why, rec.fix, rec.verify]
.filter((x) => typeof x === 'string').join('\n');
if (!text) return 0;
const counts = (text.match(COUNT_WORDS_RE) ?? []).length;
COUNT_WORDS_RE.lastIndex = 0;
const units = (text.match(UNIT_RE) ?? []).length;
UNIT_RE.lastIndex = 0;
// Higher weight than route-scope variant — file:line gold standard isn't available.
let s = Math.min(0.55, units * 0.15) + Math.min(0.35, counts * 0.08);
if (typeof rec.o11ySignal === 'string' && rec.o11ySignal.length > 0) s += 0.1;
return Math.min(1, roundTo(s, 4));
}
function roundTo(n, d) {
const f = 10 ** d;
return Math.round(n * f) / f;
}
// 0.55 = Poor/Fair boundary. Recommending Poor-graded items erodes trust faster than the marginal recall benefit.
export function applyQualityFloor(recs, floor = 0.55) {
const kept = [];
const dropped = [];
for (const rec of recs) {
const o = rec?.quality?.overall ?? 0;
if (o < floor) dropped.push({ rec, reason: `quality.overall=${o} < floor=${floor}` });
else kept.push(rec);
}
return { kept, dropped };
}