#!/usr/bin/env python3 """ Skill Quality Scorer — Antigravity Awesome Skills Computes a quality score for each skill across three dimensions: - Metadata completeness (30%) - Documentation structure (40%) - Security posture (30%) Scores are informational only — never blocking in CI. Usage: node tools/scripts/run-python.js tools/scripts/score_skills.py node tools/scripts/run-python.js tools/scripts/score_skills.py --json node tools/scripts/run-python.js tools/scripts/score_skills.py --output data/scores.json node tools/scripts/run-python.js tools/scripts/score_skills.py --threshold 60 """ from __future__ import annotations import argparse import json import os import re import sys from dataclasses import dataclass, field, asdict from datetime import datetime, timezone from pathlib import Path from typing import Any from _project_paths import find_repo_root from validate_skills import ( configure_utf8_output, parse_frontmatter, has_when_to_use_section, ) from security_scanner import scan_content, ScanResult # --------------------------------------------------------------------------- # Constants # --------------------------------------------------------------------------- VALID_RISKS = {"none", "safe", "critical", "offensive", "unknown"} OPTIONAL_BONUS_FIELDS = ("category", "tags", "author", "tools", "license") DOCUMENTATION_SECTIONS = [ re.compile(r"^##\s+Overview\b", re.MULTILINE | re.IGNORECASE), re.compile(r"^##\s+How\s+It\s+Works\b", re.MULTILINE | re.IGNORECASE), re.compile(r"^##\s+Example(s)?\b", re.MULTILINE | re.IGNORECASE), re.compile(r"^##\s+Usage\b", re.MULTILINE | re.IGNORECASE), re.compile(r"^##\s+Best\s+Practices\b", re.MULTILINE | re.IGNORECASE), re.compile(r"^##\s+Limitation(s)?\b", re.MULTILINE | re.IGNORECASE), re.compile(r"^##\s+When\s+to\s+Use", re.MULTILINE | re.IGNORECASE), ] FENCED_CODE_BLOCK = re.compile(r"^```", re.MULTILINE) # Score weights (must sum to 1.0) _W_METADATA = 0.30 _W_DOCS = 0.40 _W_SECURITY = 0.30 # Score thresholds for display labels LABEL_EXCELLENT = 85 LABEL_GOOD = 65 LABEL_NEEDS_IMPROVEMENT = 45 # --------------------------------------------------------------------------- # Data models # --------------------------------------------------------------------------- @dataclass class ScoreDimensions: metadata: float documentation: float security: float total: float @dataclass class SkillScore: skill_id: str risk: str metadata_score: float documentation_score: float security_score: float total_score: float label: str flags: list[dict] = field(default_factory=list) def to_dict(self) -> dict[str, Any]: return { "skill_id": self.skill_id, "risk": self.risk, "scores": { "metadata": round(self.metadata_score, 1), "documentation": round(self.documentation_score, 1), "security": round(self.security_score, 1), "total": round(self.total_score, 1), }, "label": self.label, "flags": self.flags, } # --------------------------------------------------------------------------- # Scoring functions # --------------------------------------------------------------------------- def _label_for(score: float) -> str: if score >= LABEL_EXCELLENT: return "excellent" if score >= LABEL_GOOD: return "good" if score >= LABEL_NEEDS_IMPROVEMENT: return "needs_improvement" return "critical" def score_metadata(metadata: dict, folder_name: str) -> float: """ Score metadata completeness on a 0–100 scale. Penalties: -25 name missing or mismatch with folder -20 description missing -10 description too short (<20 chars) -15 risk missing -10 risk is 'unknown' (unclassified) -15 source missing -10 date_added missing -10 per validation error (capped at 30) Bonuses: +5 per optional field filled (category, tags, author, tools, license) """ score = 100.0 name = metadata.get("name", "") if not name: score -= 25 elif name != folder_name: score -= 25 desc = metadata.get("description", "") if not desc: score -= 20 elif len(str(desc)) < 20: score -= 10 risk = metadata.get("risk", "") if not risk: score -= 15 elif risk == "unknown": score -= 10 if not metadata.get("source"): score -= 15 if not metadata.get("date_added"): score -= 10 # Bonuses for optional fields for bonus_field in OPTIONAL_BONUS_FIELDS: val = metadata.get(bonus_field) if val and (not isinstance(val, list) or len(val) > 0): score += 5 return max(0.0, min(100.0, score)) def score_documentation(content: str, body: str) -> float: """ Score documentation quality on a 0–100 scale. Section coverage (up to 60 pts): Each recognized section contributes equally to section coverage. Content depth (up to 40 pts): - Has When to Use: 10 pts - Has code examples: 10 pts - Body length >= 500 chars: 10 pts - Body length >= 1000 chars: 10 additional pts """ section_hits = sum( 1 for pattern in DOCUMENTATION_SECTIONS if pattern.search(content) ) section_ratio = section_hits / len(DOCUMENTATION_SECTIONS) section_score = section_ratio * 60.0 depth_score = 0.0 if has_when_to_use_section(content): depth_score += 10.0 if FENCED_CODE_BLOCK.search(body): depth_score += 10.0 body_len = len(body) if body_len >= 500: depth_score += 10.0 if body_len >= 1000: depth_score += 10.0 return max(0.0, min(100.0, section_score + depth_score)) def score_security(scan_result: ScanResult, metadata: dict) -> float: """ Score security posture on a 0–100 scale. Penalties: -20 per error flag -10 per warning flag -3 per info flag Bonus: +5 risk is explicit and not 'unknown' """ score = 100.0 for flag in scan_result.flags: if flag.severity == "error": score -= 20.0 elif flag.severity == "warning": score -= 10.0 else: score -= 3.0 risk = metadata.get("risk", "unknown") if risk in VALID_RISKS and risk != "unknown": score = min(100.0, score + 5.0) return max(0.0, score) def score_skill(skill_path: Path, skill_id: str | None = None) -> SkillScore | None: """ Read a skill directory and compute its quality score. Returns None if the skill cannot be read or parsed. Args: skill_path: Path to the skill directory containing SKILL.md. skill_id: Override for the skill identifier (e.g. a relative path). Defaults to the directory name. """ skill_file = skill_path / "SKILL.md" if not skill_file.exists(): return None try: content = skill_file.read_text(encoding="utf-8") except OSError: return None metadata, _ = parse_frontmatter(content) if metadata is None: metadata = {} # Strip frontmatter to get body for documentation scoring body = re.sub(r"^---\s*\n.*?\n---\s*\n?", "", content, count=1, flags=re.DOTALL) effective_id = skill_id if skill_id is not None else skill_path.name is_offensive = str(metadata.get("risk", "")).lower() == "offensive" scan_result = scan_content( skill_id=effective_id, content=body, is_offensive=is_offensive, ) # Metadata name comparison always uses the immediate directory name meta_score = score_metadata(metadata, skill_path.name) doc_score = score_documentation(content, body) sec_score = score_security(scan_result, metadata) total = (meta_score * _W_METADATA) + (doc_score * _W_DOCS) + (sec_score * _W_SECURITY) return SkillScore( skill_id=effective_id, risk=metadata.get("risk", "unknown"), metadata_score=round(meta_score, 1), documentation_score=round(doc_score, 1), security_score=round(sec_score, 1), total_score=round(total, 1), label=_label_for(total), flags=[f.to_dict() for f in scan_result.flags], ) def score_all_skills(skills_dir: Path) -> list[SkillScore]: """Score every skill directory found under skills_dir (recursively).""" scores: list[SkillScore] = [] for skill_file in sorted(skills_dir.rglob("SKILL.md")): skill_path = skill_file.parent if any(part.startswith(".") for part in skill_path.parts): continue # Use path relative to skills_dir as ID to avoid collisions in nested layouts rel_id = skill_path.relative_to(skills_dir).as_posix() result = score_skill(skill_path, skill_id=rel_id) if result is not None: scores.append(result) return scores # --------------------------------------------------------------------------- # Summary # --------------------------------------------------------------------------- def build_summary(scores: list[SkillScore]) -> dict[str, Any]: if not scores: return {} totals = [s.total_score for s in scores] avg = sum(totals) / len(totals) distribution: dict[str, int] = { "excellent": 0, "good": 0, "needs_improvement": 0, "critical": 0, } for s in scores: distribution[s.label] += 1 risk_breakdown: dict[str, int] = {} for s in scores: risk_breakdown[s.risk] = risk_breakdown.get(s.risk, 0) + 1 flag_errors = sum( 1 for s in scores for f in s.flags if f["severity"] == "error" ) flag_warnings = sum( 1 for s in scores for f in s.flags if f["severity"] == "warning" ) return { "total_skills": len(scores), "average_score": round(avg, 1), "min_score": round(min(totals), 1), "max_score": round(max(totals), 1), "score_distribution": distribution, "risk_breakdown": risk_breakdown, "flag_errors": flag_errors, "flag_warnings": flag_warnings, } # --------------------------------------------------------------------------- # CLI # --------------------------------------------------------------------------- def _print_table(scores: list[SkillScore], threshold: float | None = None) -> None: configure_utf8_output() label_icon = { "excellent": "✅", "good": "🟢", "needs_improvement": "⚠️ ", "critical": "❌", } flagged = [s for s in scores if threshold is not None and s.total_score < threshold] display = flagged if threshold is not None else scores header = f"{'Skill':<50} {'Total':>6} {'Meta':>6} {'Docs':>6} {'Sec':>6} Label" print(f"\n{'─' * len(header)}") print(header) print(f"{'─' * len(header)}") for s in display: icon = label_icon.get(s.label, " ") print( f"{s.skill_id:<50} {s.total_score:>6.1f} " f"{s.metadata_score:>6.1f} {s.documentation_score:>6.1f} " f"{s.security_score:>6.1f} {icon} {s.label}" ) def _print_summary(summary: dict) -> None: dist = summary.get("score_distribution", {}) print(f"\n{'═' * 60}") print("📊 SKILL QUALITY REPORT") print(f"{'─' * 60}") print(f" Skills scored : {summary.get('total_skills', 0)}") print(f" Average score : {summary.get('average_score', 0):.1f}") print(f" Min / Max : {summary.get('min_score', 0):.1f} / {summary.get('max_score', 0):.1f}") print(f" ✅ Excellent : {dist.get('excellent', 0)}") print(f" 🟢 Good : {dist.get('good', 0)}") print(f" ⚠️ Needs work : {dist.get('needs_improvement', 0)}") print(f" ❌ Critical : {dist.get('critical', 0)}") print(f" Security flags: {summary.get('flag_errors', 0)} errors, {summary.get('flag_warnings', 0)} warnings") print(f"{'═' * 60}\n") def main(argv: list[str] | None = None) -> int: configure_utf8_output() parser = argparse.ArgumentParser( description="Score Antigravity skill quality (metadata, documentation, security)." ) parser.add_argument( "--json", action="store_true", help="Print full results as JSON instead of table.", ) parser.add_argument( "--output", metavar="FILE", help="Write JSON results to FILE (e.g. data/scores.json).", ) parser.add_argument( "--threshold", type=float, default=None, metavar="N", help="Only display skills with total score below N.", ) parser.add_argument( "--top", type=int, default=None, metavar="N", help="Only display the top N lowest-scoring skills.", ) args = parser.parse_args(argv) repo_root = find_repo_root(__file__) skills_dir = repo_root / "skills" if not args.json: print(f"📐 Scoring skills in: {skills_dir}") scores = score_all_skills(skills_dir) summary = build_summary(scores) if args.json or args.output: payload = { "generated_at": datetime.now(timezone.utc).isoformat(), "summary": summary, "skills": [s.to_dict() for s in scores], } if args.json: print(json.dumps(payload, indent=2, ensure_ascii=False)) if args.output: output_path = repo_root / args.output output_path.parent.mkdir(parents=True, exist_ok=True) output_path.write_text( json.dumps(payload, indent=2, ensure_ascii=False), encoding="utf-8", ) print(f"\n💾 Saved to: {output_path}") else: display = scores if args.top: display = sorted(scores, key=lambda s: s.total_score)[: args.top] elif args.threshold is not None: display = [s for s in scores if s.total_score < args.threshold] _print_table(display) _print_summary(summary) return 0 if __name__ == "__main__": sys.exit(main())