playbook/antigravity-awesome-skills/tools/scripts/fix_truncated_descriptions.py

258 lines
8.1 KiB
Python

#!/usr/bin/env python3
from __future__ import annotations
import argparse
import os
import re
import sys
from pathlib import Path
from _safe_files import is_safe_regular_file
from _project_paths import find_repo_root
from validate_skills import configure_utf8_output, parse_frontmatter
ELLIPSIS_PATTERN = re.compile(r"(?:\.\.\.|…)\s*$")
MAX_DESCRIPTION_LENGTH = 300
MIN_PARAGRAPH_LENGTH = 40
TOP_LEVEL_KEY_PATTERN = re.compile(r"^[A-Za-z0-9_-]+:\s*")
FRONTMATTER_PATTERN = re.compile(r"^---\s*\n(.*?)\n---", re.DOTALL)
MARKDOWN_DECORATION_PATTERN = re.compile(r"[*_`]+")
HTML_TAG_PATTERN = re.compile(r"<[^>]+>")
MULTISPACE_PATTERN = re.compile(r"\s+")
def strip_frontmatter(content: str) -> str:
match = FRONTMATTER_PATTERN.search(content)
if not match:
return content
return content[match.end():].lstrip()
def normalize_text(text: str) -> str:
text = text.strip()
text = re.sub(r"^\s*>+\s?", "", text)
text = MARKDOWN_DECORATION_PATTERN.sub("", text)
text = HTML_TAG_PATTERN.sub("", text)
text = MULTISPACE_PATTERN.sub(" ", text)
return text.strip()
def split_candidate_paragraphs(body: str) -> list[str]:
paragraphs: list[str] = []
current: list[str] = []
in_code_block = False
for raw_line in body.splitlines():
line = raw_line.rstrip()
stripped = line.strip()
if stripped.startswith("```"):
in_code_block = not in_code_block
if current:
paragraphs.append(" ".join(current))
current = []
continue
if in_code_block:
continue
if not stripped:
if current:
paragraphs.append(" ".join(current))
current = []
continue
if stripped.startswith("#"):
if current:
paragraphs.append(" ".join(current))
current = []
continue
if stripped.startswith(("- ", "* ", "|", "1. ", "2. ", "3. ", "4. ", "5. ")):
if current:
paragraphs.append(" ".join(current))
current = []
continue
current.append(stripped)
if current:
paragraphs.append(" ".join(current))
return [normalize_text(paragraph) for paragraph in paragraphs if normalize_text(paragraph)]
def is_usable_paragraph(paragraph: str) -> bool:
lower = paragraph.lower()
if len(paragraph) < MIN_PARAGRAPH_LENGTH:
return False
if lower.startswith(("role:", "works well with:", "capabilities:", "patterns:", "anti-patterns:")):
return False
if lower.startswith("this skill is applicable to execute the workflow"):
return False
return True
def normalize_for_match(text: str) -> str:
return re.sub(r"[^a-z0-9]+", "", text.lower())
def pick_candidate(description: str, body: str) -> str | None:
paragraphs = [paragraph for paragraph in split_candidate_paragraphs(body) if is_usable_paragraph(paragraph)]
if not paragraphs:
return None
desc_prefix = ELLIPSIS_PATTERN.sub("", description).strip()
normalized_prefix = normalize_for_match(desc_prefix)
if normalized_prefix:
for paragraph in paragraphs:
normalized_paragraph = normalize_for_match(paragraph)
if normalized_paragraph.startswith(normalized_prefix) or normalized_prefix in normalized_paragraph:
return paragraph
return paragraphs[0]
def clamp_description(text: str, max_length: int = MAX_DESCRIPTION_LENGTH) -> str:
text = normalize_text(text)
if len(text) <= max_length:
return text
sentence_candidates = [". ", "! ", "? "]
best_split = -1
for marker in sentence_candidates:
split = text.rfind(marker, 0, max_length + 1)
if split > best_split:
best_split = split
if best_split != -1:
return text[: best_split + 1].strip()
split = text.rfind(" ", 0, max_length + 1)
if split == -1:
return text[:max_length].strip()
return text[:split].strip()
def escape_yaml_string(text: str) -> str:
return text.replace("\\", "\\\\").replace('"', '\\"')
def replace_description(frontmatter_text: str, new_description: str) -> str:
lines = frontmatter_text.splitlines()
replacement = f'description: "{escape_yaml_string(new_description)}"'
for index, line in enumerate(lines):
if not re.match(r"^\s*description:\s*", line):
continue
current_indent = len(line) - len(line.lstrip(" "))
end_index = index + 1
while end_index < len(lines):
candidate = lines[end_index]
stripped = candidate.strip()
candidate_indent = len(candidate) - len(candidate.lstrip(" "))
if not stripped:
end_index += 1
continue
if candidate_indent <= current_indent and TOP_LEVEL_KEY_PATTERN.match(stripped):
break
end_index += 1
updated = lines[:index] + [replacement] + lines[end_index:]
return "\n".join(updated)
raise ValueError("Description field not found in frontmatter.")
def update_skill_file(skill_path: Path) -> tuple[bool, str | None]:
if not is_safe_regular_file(skill_path):
return False, None
content = skill_path.read_text(encoding="utf-8")
match = FRONTMATTER_PATTERN.search(content)
if not match:
return False, None
metadata, _ = parse_frontmatter(content, skill_path.as_posix())
if not metadata:
return False, None
description = metadata.get("description")
if not isinstance(description, str) or not ELLIPSIS_PATTERN.search(description.strip()):
return False, None
candidate = pick_candidate(description, strip_frontmatter(content))
if not candidate:
return False, None
new_description = clamp_description(candidate)
if not new_description or new_description == normalize_text(description):
return False, None
updated_frontmatter = replace_description(match.group(1), new_description)
updated_content = f"---\n{updated_frontmatter}\n---{content[match.end():]}"
if updated_content == content:
return False, None
skill_path.write_text(updated_content, encoding="utf-8")
return True, new_description
def main() -> int:
configure_utf8_output()
parser = argparse.ArgumentParser(description="Repair truncated SKILL.md frontmatter descriptions.")
parser.add_argument("--dry-run", action="store_true", help="Report planned fixes without writing files.")
args = parser.parse_args()
repo_root = find_repo_root(__file__)
skills_dir = repo_root / "skills"
fixed = 0
skipped = 0
for root, dirs, files in os.walk(skills_dir):
dirs[:] = [directory for directory in dirs if not directory.startswith(".")]
if "SKILL.md" not in files:
continue
skill_path = Path(root) / "SKILL.md"
if not is_safe_regular_file(skill_path):
print(f"SKIP {skill_path.relative_to(repo_root)} [symlinked_or_unreadable]")
continue
content = skill_path.read_text(encoding="utf-8")
metadata, _ = parse_frontmatter(content, skill_path.as_posix())
description = metadata.get("description") if metadata else None
if not isinstance(description, str) or not ELLIPSIS_PATTERN.search(description.strip()):
continue
candidate = pick_candidate(description, strip_frontmatter(content))
if not candidate:
skipped += 1
print(f"SKIP {skill_path.relative_to(repo_root)}")
continue
new_description = clamp_description(candidate)
if args.dry_run:
fixed += 1
print(f"FIX {skill_path.relative_to(repo_root)} -> {new_description}")
continue
changed, _ = update_skill_file(skill_path)
if changed:
fixed += 1
print(f"FIX {skill_path.relative_to(repo_root)}")
else:
skipped += 1
print(f"SKIP {skill_path.relative_to(repo_root)}")
print(f"\nFixed: {fixed}")
print(f"Skipped: {skipped}")
return 0
if __name__ == "__main__":
sys.exit(main())