playbook/antigravity-awesome-skills/scripts/validate-links.sh

143 lines
4.0 KiB
Bash
Executable File

#!/bin/bash
# Path-aware, deterministic link validation for repository documentation.
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
OUTPUT_FILE="$PROJECT_ROOT/docs_zh-CN/link-validation-report.txt"
cd "$PROJECT_ROOT"
python3 - <<'PY'
from __future__ import annotations
from pathlib import Path
from urllib.parse import unquote
import re
import sys
PROJECT_ROOT = Path.cwd()
OUTPUT_FILE = PROJECT_ROOT / "docs_zh-CN" / "link-validation-report.txt"
SCAN_ROOTS = [Path("README.md"), Path("docs"), Path("docs_zh-CN")]
LINK_RE = re.compile(r"(?<!!)\[[^\]]+\]\(([^)]+)\)")
def iter_markdown_files() -> list[Path]:
files: list[Path] = []
for root in SCAN_ROOTS:
if root.is_file():
files.append(root)
elif root.is_dir():
files.extend(sorted(root.rglob("*.md")))
return sorted(files)
def strip_code_fences(text: str) -> str:
lines: list[str] = []
in_fence = False
for line in text.splitlines():
if line.lstrip().startswith("```"):
in_fence = not in_fence
lines.append("")
continue
lines.append("" if in_fence else line)
return "\n".join(lines)
def normalize_target(raw_target: str) -> str:
target = raw_target.strip()
if target.startswith("<") and target.endswith(">"):
target = target[1:-1].strip()
return unquote(target.split("#", 1)[0].strip())
def is_external_or_anchor(raw_target: str) -> bool:
target = raw_target.strip().lower()
return (
not target
or target.startswith("#")
or target.startswith("http://")
or target.startswith("https://")
or target.startswith("mailto:")
)
def resolve_link(source_file: Path, target: str) -> Path:
if target.startswith("/"):
return (PROJECT_ROOT / target.lstrip("/")).resolve()
return (source_file.parent / target).resolve()
def relative_to_root(path: Path) -> str:
try:
return path.relative_to(PROJECT_ROOT).as_posix()
except ValueError:
return str(path)
def main() -> int:
checked = 0
broken: list[tuple[str, str, str]] = []
external: set[str] = set()
for source in iter_markdown_files():
text = strip_code_fences(source.read_text(encoding="utf-8", errors="replace"))
for match in LINK_RE.finditer(text):
raw_target = match.group(1)
if is_external_or_anchor(raw_target):
if raw_target.strip().lower().startswith(("http://", "https://")):
external.add(raw_target.strip())
continue
target = normalize_target(raw_target)
if not target:
continue
checked += 1
resolved = resolve_link(source, target)
if not resolved.exists():
broken.append((source.as_posix(), raw_target.strip(), relative_to_root(resolved)))
report_lines = [
"Link Validation Report",
"======================",
"Generated: deterministic",
"",
"Scanned roots:",
"- README.md",
"- docs",
"- docs_zh-CN",
"",
"Internal links:",
f"- Checked: {checked}",
f"- Broken: {len(broken)}",
]
if broken:
report_lines.extend(["", "Broken internal links:"])
for source, raw_target, resolved in broken:
report_lines.append(f"- {source}: {raw_target} -> {resolved}")
report_lines.extend(
[
"",
"External links:",
"- Sample only; not fetched by this local validator.",
]
)
for url in sorted(external)[:20]:
report_lines.append(f"- {url}")
OUTPUT_FILE.write_text("\n".join(report_lines) + "\n", encoding="utf-8")
print(f"Link validation complete. Report saved to: {relative_to_root(OUTPUT_FILE)}")
print(f"Internal links checked: {checked}")
print(f"Broken internal links: {len(broken)}")
return 1 if broken else 0
if __name__ == "__main__":
sys.exit(main())
PY