460 lines
14 KiB
Python
460 lines
14 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
Skill Quality Scorer — Antigravity Awesome Skills
|
||
Computes a quality score for each skill across three dimensions:
|
||
- Metadata completeness (30%)
|
||
- Documentation structure (40%)
|
||
- Security posture (30%)
|
||
|
||
Scores are informational only — never blocking in CI.
|
||
|
||
Usage:
|
||
node tools/scripts/run-python.js tools/scripts/score_skills.py
|
||
node tools/scripts/run-python.js tools/scripts/score_skills.py --json
|
||
node tools/scripts/run-python.js tools/scripts/score_skills.py --output data/scores.json
|
||
node tools/scripts/run-python.js tools/scripts/score_skills.py --threshold 60
|
||
"""
|
||
from __future__ import annotations
|
||
|
||
import argparse
|
||
import json
|
||
import os
|
||
import re
|
||
import sys
|
||
from dataclasses import dataclass, field, asdict
|
||
from datetime import datetime, timezone
|
||
from pathlib import Path
|
||
from typing import Any
|
||
|
||
from _project_paths import find_repo_root
|
||
from validate_skills import (
|
||
configure_utf8_output,
|
||
parse_frontmatter,
|
||
has_when_to_use_section,
|
||
)
|
||
from security_scanner import scan_content, ScanResult
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Constants
|
||
# ---------------------------------------------------------------------------
|
||
|
||
VALID_RISKS = {"none", "safe", "critical", "offensive", "unknown"}
|
||
|
||
OPTIONAL_BONUS_FIELDS = ("category", "tags", "author", "tools", "license")
|
||
|
||
DOCUMENTATION_SECTIONS = [
|
||
re.compile(r"^##\s+Overview\b", re.MULTILINE | re.IGNORECASE),
|
||
re.compile(r"^##\s+How\s+It\s+Works\b", re.MULTILINE | re.IGNORECASE),
|
||
re.compile(r"^##\s+Example(s)?\b", re.MULTILINE | re.IGNORECASE),
|
||
re.compile(r"^##\s+Usage\b", re.MULTILINE | re.IGNORECASE),
|
||
re.compile(r"^##\s+Best\s+Practices\b", re.MULTILINE | re.IGNORECASE),
|
||
re.compile(r"^##\s+Limitation(s)?\b", re.MULTILINE | re.IGNORECASE),
|
||
re.compile(r"^##\s+When\s+to\s+Use", re.MULTILINE | re.IGNORECASE),
|
||
]
|
||
|
||
FENCED_CODE_BLOCK = re.compile(r"^```", re.MULTILINE)
|
||
|
||
# Score weights (must sum to 1.0)
|
||
_W_METADATA = 0.30
|
||
_W_DOCS = 0.40
|
||
_W_SECURITY = 0.30
|
||
|
||
# Score thresholds for display labels
|
||
LABEL_EXCELLENT = 85
|
||
LABEL_GOOD = 65
|
||
LABEL_NEEDS_IMPROVEMENT = 45
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Data models
|
||
# ---------------------------------------------------------------------------
|
||
|
||
@dataclass
|
||
class ScoreDimensions:
|
||
metadata: float
|
||
documentation: float
|
||
security: float
|
||
total: float
|
||
|
||
|
||
@dataclass
|
||
class SkillScore:
|
||
skill_id: str
|
||
risk: str
|
||
metadata_score: float
|
||
documentation_score: float
|
||
security_score: float
|
||
total_score: float
|
||
label: str
|
||
flags: list[dict] = field(default_factory=list)
|
||
|
||
def to_dict(self) -> dict[str, Any]:
|
||
return {
|
||
"skill_id": self.skill_id,
|
||
"risk": self.risk,
|
||
"scores": {
|
||
"metadata": round(self.metadata_score, 1),
|
||
"documentation": round(self.documentation_score, 1),
|
||
"security": round(self.security_score, 1),
|
||
"total": round(self.total_score, 1),
|
||
},
|
||
"label": self.label,
|
||
"flags": self.flags,
|
||
}
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Scoring functions
|
||
# ---------------------------------------------------------------------------
|
||
|
||
def _label_for(score: float) -> str:
|
||
if score >= LABEL_EXCELLENT:
|
||
return "excellent"
|
||
if score >= LABEL_GOOD:
|
||
return "good"
|
||
if score >= LABEL_NEEDS_IMPROVEMENT:
|
||
return "needs_improvement"
|
||
return "critical"
|
||
|
||
|
||
def score_metadata(metadata: dict, folder_name: str) -> float:
|
||
"""
|
||
Score metadata completeness on a 0–100 scale.
|
||
|
||
Penalties:
|
||
-25 name missing or mismatch with folder
|
||
-20 description missing
|
||
-10 description too short (<20 chars)
|
||
-15 risk missing
|
||
-10 risk is 'unknown' (unclassified)
|
||
-15 source missing
|
||
-10 date_added missing
|
||
-10 per validation error (capped at 30)
|
||
|
||
Bonuses:
|
||
+5 per optional field filled (category, tags, author, tools, license)
|
||
"""
|
||
score = 100.0
|
||
|
||
name = metadata.get("name", "")
|
||
if not name:
|
||
score -= 25
|
||
elif name != folder_name:
|
||
score -= 25
|
||
|
||
desc = metadata.get("description", "")
|
||
if not desc:
|
||
score -= 20
|
||
elif len(str(desc)) < 20:
|
||
score -= 10
|
||
|
||
risk = metadata.get("risk", "")
|
||
if not risk:
|
||
score -= 15
|
||
elif risk == "unknown":
|
||
score -= 10
|
||
|
||
if not metadata.get("source"):
|
||
score -= 15
|
||
|
||
if not metadata.get("date_added"):
|
||
score -= 10
|
||
|
||
# Bonuses for optional fields
|
||
for bonus_field in OPTIONAL_BONUS_FIELDS:
|
||
val = metadata.get(bonus_field)
|
||
if val and (not isinstance(val, list) or len(val) > 0):
|
||
score += 5
|
||
|
||
return max(0.0, min(100.0, score))
|
||
|
||
|
||
def score_documentation(content: str, body: str) -> float:
|
||
"""
|
||
Score documentation quality on a 0–100 scale.
|
||
|
||
Section coverage (up to 60 pts):
|
||
Each recognized section contributes equally to section coverage.
|
||
|
||
Content depth (up to 40 pts):
|
||
- Has When to Use: 10 pts
|
||
- Has code examples: 10 pts
|
||
- Body length >= 500 chars: 10 pts
|
||
- Body length >= 1000 chars: 10 additional pts
|
||
"""
|
||
section_hits = sum(
|
||
1 for pattern in DOCUMENTATION_SECTIONS if pattern.search(content)
|
||
)
|
||
section_ratio = section_hits / len(DOCUMENTATION_SECTIONS)
|
||
section_score = section_ratio * 60.0
|
||
|
||
depth_score = 0.0
|
||
if has_when_to_use_section(content):
|
||
depth_score += 10.0
|
||
if FENCED_CODE_BLOCK.search(body):
|
||
depth_score += 10.0
|
||
body_len = len(body)
|
||
if body_len >= 500:
|
||
depth_score += 10.0
|
||
if body_len >= 1000:
|
||
depth_score += 10.0
|
||
|
||
return max(0.0, min(100.0, section_score + depth_score))
|
||
|
||
|
||
def score_security(scan_result: ScanResult, metadata: dict) -> float:
|
||
"""
|
||
Score security posture on a 0–100 scale.
|
||
|
||
Penalties:
|
||
-20 per error flag
|
||
-10 per warning flag
|
||
-3 per info flag
|
||
|
||
Bonus:
|
||
+5 risk is explicit and not 'unknown'
|
||
"""
|
||
score = 100.0
|
||
|
||
for flag in scan_result.flags:
|
||
if flag.severity == "error":
|
||
score -= 20.0
|
||
elif flag.severity == "warning":
|
||
score -= 10.0
|
||
else:
|
||
score -= 3.0
|
||
|
||
risk = metadata.get("risk", "unknown")
|
||
if risk in VALID_RISKS and risk != "unknown":
|
||
score = min(100.0, score + 5.0)
|
||
|
||
return max(0.0, score)
|
||
|
||
|
||
def score_skill(skill_path: Path, skill_id: str | None = None) -> SkillScore | None:
|
||
"""
|
||
Read a skill directory and compute its quality score.
|
||
Returns None if the skill cannot be read or parsed.
|
||
|
||
Args:
|
||
skill_path: Path to the skill directory containing SKILL.md.
|
||
skill_id: Override for the skill identifier (e.g. a relative path).
|
||
Defaults to the directory name.
|
||
"""
|
||
skill_file = skill_path / "SKILL.md"
|
||
if not skill_file.exists():
|
||
return None
|
||
|
||
try:
|
||
content = skill_file.read_text(encoding="utf-8")
|
||
except OSError:
|
||
return None
|
||
|
||
metadata, _ = parse_frontmatter(content)
|
||
if metadata is None:
|
||
metadata = {}
|
||
|
||
# Strip frontmatter to get body for documentation scoring
|
||
body = re.sub(r"^---\s*\n.*?\n---\s*\n?", "", content, count=1, flags=re.DOTALL)
|
||
|
||
effective_id = skill_id if skill_id is not None else skill_path.name
|
||
is_offensive = str(metadata.get("risk", "")).lower() == "offensive"
|
||
scan_result = scan_content(
|
||
skill_id=effective_id,
|
||
content=body,
|
||
is_offensive=is_offensive,
|
||
)
|
||
|
||
# Metadata name comparison always uses the immediate directory name
|
||
meta_score = score_metadata(metadata, skill_path.name)
|
||
doc_score = score_documentation(content, body)
|
||
sec_score = score_security(scan_result, metadata)
|
||
|
||
total = (meta_score * _W_METADATA) + (doc_score * _W_DOCS) + (sec_score * _W_SECURITY)
|
||
|
||
return SkillScore(
|
||
skill_id=effective_id,
|
||
risk=metadata.get("risk", "unknown"),
|
||
metadata_score=round(meta_score, 1),
|
||
documentation_score=round(doc_score, 1),
|
||
security_score=round(sec_score, 1),
|
||
total_score=round(total, 1),
|
||
label=_label_for(total),
|
||
flags=[f.to_dict() for f in scan_result.flags],
|
||
)
|
||
|
||
|
||
def score_all_skills(skills_dir: Path) -> list[SkillScore]:
|
||
"""Score every skill directory found under skills_dir (recursively)."""
|
||
scores: list[SkillScore] = []
|
||
for skill_file in sorted(skills_dir.rglob("SKILL.md")):
|
||
skill_path = skill_file.parent
|
||
if any(part.startswith(".") for part in skill_path.parts):
|
||
continue
|
||
# Use path relative to skills_dir as ID to avoid collisions in nested layouts
|
||
rel_id = skill_path.relative_to(skills_dir).as_posix()
|
||
result = score_skill(skill_path, skill_id=rel_id)
|
||
if result is not None:
|
||
scores.append(result)
|
||
return scores
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Summary
|
||
# ---------------------------------------------------------------------------
|
||
|
||
def build_summary(scores: list[SkillScore]) -> dict[str, Any]:
|
||
if not scores:
|
||
return {}
|
||
|
||
totals = [s.total_score for s in scores]
|
||
avg = sum(totals) / len(totals)
|
||
|
||
distribution: dict[str, int] = {
|
||
"excellent": 0,
|
||
"good": 0,
|
||
"needs_improvement": 0,
|
||
"critical": 0,
|
||
}
|
||
for s in scores:
|
||
distribution[s.label] += 1
|
||
|
||
risk_breakdown: dict[str, int] = {}
|
||
for s in scores:
|
||
risk_breakdown[s.risk] = risk_breakdown.get(s.risk, 0) + 1
|
||
|
||
flag_errors = sum(
|
||
1 for s in scores for f in s.flags if f["severity"] == "error"
|
||
)
|
||
flag_warnings = sum(
|
||
1 for s in scores for f in s.flags if f["severity"] == "warning"
|
||
)
|
||
|
||
return {
|
||
"total_skills": len(scores),
|
||
"average_score": round(avg, 1),
|
||
"min_score": round(min(totals), 1),
|
||
"max_score": round(max(totals), 1),
|
||
"score_distribution": distribution,
|
||
"risk_breakdown": risk_breakdown,
|
||
"flag_errors": flag_errors,
|
||
"flag_warnings": flag_warnings,
|
||
}
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# CLI
|
||
# ---------------------------------------------------------------------------
|
||
|
||
def _print_table(scores: list[SkillScore], threshold: float | None = None) -> None:
|
||
configure_utf8_output()
|
||
label_icon = {
|
||
"excellent": "✅",
|
||
"good": "🟢",
|
||
"needs_improvement": "⚠️ ",
|
||
"critical": "❌",
|
||
}
|
||
|
||
flagged = [s for s in scores if threshold is not None and s.total_score < threshold]
|
||
display = flagged if threshold is not None else scores
|
||
|
||
header = f"{'Skill':<50} {'Total':>6} {'Meta':>6} {'Docs':>6} {'Sec':>6} Label"
|
||
print(f"\n{'─' * len(header)}")
|
||
print(header)
|
||
print(f"{'─' * len(header)}")
|
||
|
||
for s in display:
|
||
icon = label_icon.get(s.label, " ")
|
||
print(
|
||
f"{s.skill_id:<50} {s.total_score:>6.1f} "
|
||
f"{s.metadata_score:>6.1f} {s.documentation_score:>6.1f} "
|
||
f"{s.security_score:>6.1f} {icon} {s.label}"
|
||
)
|
||
|
||
|
||
def _print_summary(summary: dict) -> None:
|
||
dist = summary.get("score_distribution", {})
|
||
print(f"\n{'═' * 60}")
|
||
print("📊 SKILL QUALITY REPORT")
|
||
print(f"{'─' * 60}")
|
||
print(f" Skills scored : {summary.get('total_skills', 0)}")
|
||
print(f" Average score : {summary.get('average_score', 0):.1f}")
|
||
print(f" Min / Max : {summary.get('min_score', 0):.1f} / {summary.get('max_score', 0):.1f}")
|
||
print(f" ✅ Excellent : {dist.get('excellent', 0)}")
|
||
print(f" 🟢 Good : {dist.get('good', 0)}")
|
||
print(f" ⚠️ Needs work : {dist.get('needs_improvement', 0)}")
|
||
print(f" ❌ Critical : {dist.get('critical', 0)}")
|
||
print(f" Security flags: {summary.get('flag_errors', 0)} errors, {summary.get('flag_warnings', 0)} warnings")
|
||
print(f"{'═' * 60}\n")
|
||
|
||
|
||
def main(argv: list[str] | None = None) -> int:
|
||
configure_utf8_output()
|
||
parser = argparse.ArgumentParser(
|
||
description="Score Antigravity skill quality (metadata, documentation, security)."
|
||
)
|
||
parser.add_argument(
|
||
"--json",
|
||
action="store_true",
|
||
help="Print full results as JSON instead of table.",
|
||
)
|
||
parser.add_argument(
|
||
"--output",
|
||
metavar="FILE",
|
||
help="Write JSON results to FILE (e.g. data/scores.json).",
|
||
)
|
||
parser.add_argument(
|
||
"--threshold",
|
||
type=float,
|
||
default=None,
|
||
metavar="N",
|
||
help="Only display skills with total score below N.",
|
||
)
|
||
parser.add_argument(
|
||
"--top",
|
||
type=int,
|
||
default=None,
|
||
metavar="N",
|
||
help="Only display the top N lowest-scoring skills.",
|
||
)
|
||
args = parser.parse_args(argv)
|
||
|
||
repo_root = find_repo_root(__file__)
|
||
skills_dir = repo_root / "skills"
|
||
|
||
if not args.json:
|
||
print(f"📐 Scoring skills in: {skills_dir}")
|
||
scores = score_all_skills(skills_dir)
|
||
summary = build_summary(scores)
|
||
|
||
if args.json or args.output:
|
||
payload = {
|
||
"generated_at": datetime.now(timezone.utc).isoformat(),
|
||
"summary": summary,
|
||
"skills": [s.to_dict() for s in scores],
|
||
}
|
||
if args.json:
|
||
print(json.dumps(payload, indent=2, ensure_ascii=False))
|
||
if args.output:
|
||
output_path = repo_root / args.output
|
||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||
output_path.write_text(
|
||
json.dumps(payload, indent=2, ensure_ascii=False),
|
||
encoding="utf-8",
|
||
)
|
||
print(f"\n💾 Saved to: {output_path}")
|
||
else:
|
||
display = scores
|
||
if args.top:
|
||
display = sorted(scores, key=lambda s: s.total_score)[: args.top]
|
||
elif args.threshold is not None:
|
||
display = [s for s in scores if s.total_score < args.threshold]
|
||
_print_table(display)
|
||
_print_summary(summary)
|
||
|
||
return 0
|
||
|
||
|
||
if __name__ == "__main__":
|
||
sys.exit(main())
|