playbook/antigravity-awesome-skills/tools/scripts/score_skills.py

460 lines
14 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
Skill Quality Scorer — Antigravity Awesome Skills
Computes a quality score for each skill across three dimensions:
- Metadata completeness (30%)
- Documentation structure (40%)
- Security posture (30%)
Scores are informational only — never blocking in CI.
Usage:
node tools/scripts/run-python.js tools/scripts/score_skills.py
node tools/scripts/run-python.js tools/scripts/score_skills.py --json
node tools/scripts/run-python.js tools/scripts/score_skills.py --output data/scores.json
node tools/scripts/run-python.js tools/scripts/score_skills.py --threshold 60
"""
from __future__ import annotations
import argparse
import json
import os
import re
import sys
from dataclasses import dataclass, field, asdict
from datetime import datetime, timezone
from pathlib import Path
from typing import Any
from _project_paths import find_repo_root
from validate_skills import (
configure_utf8_output,
parse_frontmatter,
has_when_to_use_section,
)
from security_scanner import scan_content, ScanResult
# ---------------------------------------------------------------------------
# Constants
# ---------------------------------------------------------------------------
VALID_RISKS = {"none", "safe", "critical", "offensive", "unknown"}
OPTIONAL_BONUS_FIELDS = ("category", "tags", "author", "tools", "license")
DOCUMENTATION_SECTIONS = [
re.compile(r"^##\s+Overview\b", re.MULTILINE | re.IGNORECASE),
re.compile(r"^##\s+How\s+It\s+Works\b", re.MULTILINE | re.IGNORECASE),
re.compile(r"^##\s+Example(s)?\b", re.MULTILINE | re.IGNORECASE),
re.compile(r"^##\s+Usage\b", re.MULTILINE | re.IGNORECASE),
re.compile(r"^##\s+Best\s+Practices\b", re.MULTILINE | re.IGNORECASE),
re.compile(r"^##\s+Limitation(s)?\b", re.MULTILINE | re.IGNORECASE),
re.compile(r"^##\s+When\s+to\s+Use", re.MULTILINE | re.IGNORECASE),
]
FENCED_CODE_BLOCK = re.compile(r"^```", re.MULTILINE)
# Score weights (must sum to 1.0)
_W_METADATA = 0.30
_W_DOCS = 0.40
_W_SECURITY = 0.30
# Score thresholds for display labels
LABEL_EXCELLENT = 85
LABEL_GOOD = 65
LABEL_NEEDS_IMPROVEMENT = 45
# ---------------------------------------------------------------------------
# Data models
# ---------------------------------------------------------------------------
@dataclass
class ScoreDimensions:
metadata: float
documentation: float
security: float
total: float
@dataclass
class SkillScore:
skill_id: str
risk: str
metadata_score: float
documentation_score: float
security_score: float
total_score: float
label: str
flags: list[dict] = field(default_factory=list)
def to_dict(self) -> dict[str, Any]:
return {
"skill_id": self.skill_id,
"risk": self.risk,
"scores": {
"metadata": round(self.metadata_score, 1),
"documentation": round(self.documentation_score, 1),
"security": round(self.security_score, 1),
"total": round(self.total_score, 1),
},
"label": self.label,
"flags": self.flags,
}
# ---------------------------------------------------------------------------
# Scoring functions
# ---------------------------------------------------------------------------
def _label_for(score: float) -> str:
if score >= LABEL_EXCELLENT:
return "excellent"
if score >= LABEL_GOOD:
return "good"
if score >= LABEL_NEEDS_IMPROVEMENT:
return "needs_improvement"
return "critical"
def score_metadata(metadata: dict, folder_name: str) -> float:
"""
Score metadata completeness on a 0100 scale.
Penalties:
-25 name missing or mismatch with folder
-20 description missing
-10 description too short (<20 chars)
-15 risk missing
-10 risk is 'unknown' (unclassified)
-15 source missing
-10 date_added missing
-10 per validation error (capped at 30)
Bonuses:
+5 per optional field filled (category, tags, author, tools, license)
"""
score = 100.0
name = metadata.get("name", "")
if not name:
score -= 25
elif name != folder_name:
score -= 25
desc = metadata.get("description", "")
if not desc:
score -= 20
elif len(str(desc)) < 20:
score -= 10
risk = metadata.get("risk", "")
if not risk:
score -= 15
elif risk == "unknown":
score -= 10
if not metadata.get("source"):
score -= 15
if not metadata.get("date_added"):
score -= 10
# Bonuses for optional fields
for bonus_field in OPTIONAL_BONUS_FIELDS:
val = metadata.get(bonus_field)
if val and (not isinstance(val, list) or len(val) > 0):
score += 5
return max(0.0, min(100.0, score))
def score_documentation(content: str, body: str) -> float:
"""
Score documentation quality on a 0100 scale.
Section coverage (up to 60 pts):
Each recognized section contributes equally to section coverage.
Content depth (up to 40 pts):
- Has When to Use: 10 pts
- Has code examples: 10 pts
- Body length >= 500 chars: 10 pts
- Body length >= 1000 chars: 10 additional pts
"""
section_hits = sum(
1 for pattern in DOCUMENTATION_SECTIONS if pattern.search(content)
)
section_ratio = section_hits / len(DOCUMENTATION_SECTIONS)
section_score = section_ratio * 60.0
depth_score = 0.0
if has_when_to_use_section(content):
depth_score += 10.0
if FENCED_CODE_BLOCK.search(body):
depth_score += 10.0
body_len = len(body)
if body_len >= 500:
depth_score += 10.0
if body_len >= 1000:
depth_score += 10.0
return max(0.0, min(100.0, section_score + depth_score))
def score_security(scan_result: ScanResult, metadata: dict) -> float:
"""
Score security posture on a 0100 scale.
Penalties:
-20 per error flag
-10 per warning flag
-3 per info flag
Bonus:
+5 risk is explicit and not 'unknown'
"""
score = 100.0
for flag in scan_result.flags:
if flag.severity == "error":
score -= 20.0
elif flag.severity == "warning":
score -= 10.0
else:
score -= 3.0
risk = metadata.get("risk", "unknown")
if risk in VALID_RISKS and risk != "unknown":
score = min(100.0, score + 5.0)
return max(0.0, score)
def score_skill(skill_path: Path, skill_id: str | None = None) -> SkillScore | None:
"""
Read a skill directory and compute its quality score.
Returns None if the skill cannot be read or parsed.
Args:
skill_path: Path to the skill directory containing SKILL.md.
skill_id: Override for the skill identifier (e.g. a relative path).
Defaults to the directory name.
"""
skill_file = skill_path / "SKILL.md"
if not skill_file.exists():
return None
try:
content = skill_file.read_text(encoding="utf-8")
except OSError:
return None
metadata, _ = parse_frontmatter(content)
if metadata is None:
metadata = {}
# Strip frontmatter to get body for documentation scoring
body = re.sub(r"^---\s*\n.*?\n---\s*\n?", "", content, count=1, flags=re.DOTALL)
effective_id = skill_id if skill_id is not None else skill_path.name
is_offensive = str(metadata.get("risk", "")).lower() == "offensive"
scan_result = scan_content(
skill_id=effective_id,
content=body,
is_offensive=is_offensive,
)
# Metadata name comparison always uses the immediate directory name
meta_score = score_metadata(metadata, skill_path.name)
doc_score = score_documentation(content, body)
sec_score = score_security(scan_result, metadata)
total = (meta_score * _W_METADATA) + (doc_score * _W_DOCS) + (sec_score * _W_SECURITY)
return SkillScore(
skill_id=effective_id,
risk=metadata.get("risk", "unknown"),
metadata_score=round(meta_score, 1),
documentation_score=round(doc_score, 1),
security_score=round(sec_score, 1),
total_score=round(total, 1),
label=_label_for(total),
flags=[f.to_dict() for f in scan_result.flags],
)
def score_all_skills(skills_dir: Path) -> list[SkillScore]:
"""Score every skill directory found under skills_dir (recursively)."""
scores: list[SkillScore] = []
for skill_file in sorted(skills_dir.rglob("SKILL.md")):
skill_path = skill_file.parent
if any(part.startswith(".") for part in skill_path.parts):
continue
# Use path relative to skills_dir as ID to avoid collisions in nested layouts
rel_id = skill_path.relative_to(skills_dir).as_posix()
result = score_skill(skill_path, skill_id=rel_id)
if result is not None:
scores.append(result)
return scores
# ---------------------------------------------------------------------------
# Summary
# ---------------------------------------------------------------------------
def build_summary(scores: list[SkillScore]) -> dict[str, Any]:
if not scores:
return {}
totals = [s.total_score for s in scores]
avg = sum(totals) / len(totals)
distribution: dict[str, int] = {
"excellent": 0,
"good": 0,
"needs_improvement": 0,
"critical": 0,
}
for s in scores:
distribution[s.label] += 1
risk_breakdown: dict[str, int] = {}
for s in scores:
risk_breakdown[s.risk] = risk_breakdown.get(s.risk, 0) + 1
flag_errors = sum(
1 for s in scores for f in s.flags if f["severity"] == "error"
)
flag_warnings = sum(
1 for s in scores for f in s.flags if f["severity"] == "warning"
)
return {
"total_skills": len(scores),
"average_score": round(avg, 1),
"min_score": round(min(totals), 1),
"max_score": round(max(totals), 1),
"score_distribution": distribution,
"risk_breakdown": risk_breakdown,
"flag_errors": flag_errors,
"flag_warnings": flag_warnings,
}
# ---------------------------------------------------------------------------
# CLI
# ---------------------------------------------------------------------------
def _print_table(scores: list[SkillScore], threshold: float | None = None) -> None:
configure_utf8_output()
label_icon = {
"excellent": "",
"good": "🟢",
"needs_improvement": "⚠️ ",
"critical": "",
}
flagged = [s for s in scores if threshold is not None and s.total_score < threshold]
display = flagged if threshold is not None else scores
header = f"{'Skill':<50} {'Total':>6} {'Meta':>6} {'Docs':>6} {'Sec':>6} Label"
print(f"\n{'' * len(header)}")
print(header)
print(f"{'' * len(header)}")
for s in display:
icon = label_icon.get(s.label, " ")
print(
f"{s.skill_id:<50} {s.total_score:>6.1f} "
f"{s.metadata_score:>6.1f} {s.documentation_score:>6.1f} "
f"{s.security_score:>6.1f} {icon} {s.label}"
)
def _print_summary(summary: dict) -> None:
dist = summary.get("score_distribution", {})
print(f"\n{'' * 60}")
print("📊 SKILL QUALITY REPORT")
print(f"{'' * 60}")
print(f" Skills scored : {summary.get('total_skills', 0)}")
print(f" Average score : {summary.get('average_score', 0):.1f}")
print(f" Min / Max : {summary.get('min_score', 0):.1f} / {summary.get('max_score', 0):.1f}")
print(f" ✅ Excellent : {dist.get('excellent', 0)}")
print(f" 🟢 Good : {dist.get('good', 0)}")
print(f" ⚠️ Needs work : {dist.get('needs_improvement', 0)}")
print(f" ❌ Critical : {dist.get('critical', 0)}")
print(f" Security flags: {summary.get('flag_errors', 0)} errors, {summary.get('flag_warnings', 0)} warnings")
print(f"{'' * 60}\n")
def main(argv: list[str] | None = None) -> int:
configure_utf8_output()
parser = argparse.ArgumentParser(
description="Score Antigravity skill quality (metadata, documentation, security)."
)
parser.add_argument(
"--json",
action="store_true",
help="Print full results as JSON instead of table.",
)
parser.add_argument(
"--output",
metavar="FILE",
help="Write JSON results to FILE (e.g. data/scores.json).",
)
parser.add_argument(
"--threshold",
type=float,
default=None,
metavar="N",
help="Only display skills with total score below N.",
)
parser.add_argument(
"--top",
type=int,
default=None,
metavar="N",
help="Only display the top N lowest-scoring skills.",
)
args = parser.parse_args(argv)
repo_root = find_repo_root(__file__)
skills_dir = repo_root / "skills"
if not args.json:
print(f"📐 Scoring skills in: {skills_dir}")
scores = score_all_skills(skills_dir)
summary = build_summary(scores)
if args.json or args.output:
payload = {
"generated_at": datetime.now(timezone.utc).isoformat(),
"summary": summary,
"skills": [s.to_dict() for s in scores],
}
if args.json:
print(json.dumps(payload, indent=2, ensure_ascii=False))
if args.output:
output_path = repo_root / args.output
output_path.parent.mkdir(parents=True, exist_ok=True)
output_path.write_text(
json.dumps(payload, indent=2, ensure_ascii=False),
encoding="utf-8",
)
print(f"\n💾 Saved to: {output_path}")
else:
display = scores
if args.top:
display = sorted(scores, key=lambda s: s.total_score)[: args.top]
elif args.threshold is not None:
display = [s for s in scores if s.total_score < args.threshold]
_print_table(display)
_print_summary(summary)
return 0
if __name__ == "__main__":
sys.exit(main())