#!/usr/bin/env sh # 文档链接有效性检查脚本 set -eu echo "========================================" echo "🔗 文档链接有效性检查" echo "========================================" SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" PLAYBOOK_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" TOTAL_LINKS=0 VALID_LINKS=0 BROKEN_LINKS=0 SKIPPED_LINKS=0 BROKEN_LINKS_FILE="/tmp/broken_links.txt" REPORT_FILE="/tmp/doc_links_report.txt" > "$BROKEN_LINKS_FILE" > "$REPORT_FILE" echo "📁 Playbook 根目录: $PLAYBOOK_ROOT" echo "" # ============================================ # 辅助函数 # ============================================ check_file_link() { local source_file="$1" local link_path="$2" local link_line="$3" TOTAL_LINKS=$((TOTAL_LINKS + 1)) # 处理相对路径 local source_dir source_dir="$(dirname "$source_file")" # 解析链接路径 local target_path="$link_path" # 移除锚点 target_path="${target_path%%#*}" # 跳过空链接 if [ -z "$target_path" ]; then SKIPPED_LINKS=$((SKIPPED_LINKS + 1)) return 0 fi # 跳过外部链接(http/https) if echo "$target_path" | grep -qE "^https?://"; then SKIPPED_LINKS=$((SKIPPED_LINKS + 1)) return 0 fi # 跳过 mailto 链接 if echo "$target_path" | grep -q "^mailto:"; then SKIPPED_LINKS=$((SKIPPED_LINKS + 1)) return 0 fi # 构建绝对路径 local absolute_path if echo "$target_path" | grep -q "^/"; then # 绝对路径(从仓库根) absolute_path="$PLAYBOOK_ROOT$target_path" else # 相对路径 absolute_path="$source_dir/$target_path" fi # 规范化路径 absolute_path="$(cd "$(dirname "$absolute_path")" 2>/dev/null && pwd)/$(basename "$absolute_path")" || absolute_path="" # 检查文件是否存在 if [ -n "$absolute_path" ] && [ -e "$absolute_path" ]; then VALID_LINKS=$((VALID_LINKS + 1)) return 0 else BROKEN_LINKS=$((BROKEN_LINKS + 1)) echo "❌ 断链: $source_file:$link_line" >> "$BROKEN_LINKS_FILE" echo " 链接: $link_path" >> "$BROKEN_LINKS_FILE" echo " 目标: $absolute_path" >> "$BROKEN_LINKS_FILE" echo "" >> "$BROKEN_LINKS_FILE" return 1 fi } extract_links() { awk ' BEGIN { in_code = 0 } { line = $0 if (line ~ /^```/) { in_code = !in_code; next } if (in_code) next gsub(/`[^`]*`/, "", line) while (match(line, /\[[^]]+\]\([^)]*\)/)) { link = substr(line, RSTART, RLENGTH) sub(/^\[[^]]+\]\(/, "", link) sub(/\)$/, "", link) print NR "\t" link line = substr(line, RSTART + RLENGTH) } if (match(line, /^\[[^]]+\]:[[:space:]]*[^[:space:]]+/)) { link = substr(line, RSTART, RLENGTH) sub(/^\[[^]]+\]:[[:space:]]*/, "", link) sub(/[[:space:]].*$/, "", link) print NR "\t" link } } ' "$1" } # ============================================ # 查找并检查所有 Markdown 文件 # ============================================ echo "🔍 扫描 Markdown 文件..." cd "$PLAYBOOK_ROOT" MD_FILES=$(find . -name "*.md" \ -not -name "*.template.md" \ -not -path "*/node_modules/*" \ -not -path "*/.git/*" \ -not -path "*/build/*" \ -not -path "*/dist/*" \ 2>/dev/null || true) FILE_COUNT=$(echo "$MD_FILES" | grep -c "^" || echo 0) echo "📄 找到 $FILE_COUNT 个 Markdown 文件" echo "" CURRENT_FILE_NUM=0 for md_file in $MD_FILES; do CURRENT_FILE_NUM=$((CURRENT_FILE_NUM + 1)) # 显示进度 if [ "$CURRENT_FILE_NUM" -eq 1 ] || [ $((CURRENT_FILE_NUM % 10)) -eq 0 ] || [ "$CURRENT_FILE_NUM" -eq "$FILE_COUNT" ]; then echo "📖 处理中... [$CURRENT_FILE_NUM/$FILE_COUNT] $md_file" fi links_file="$(mktemp)" extract_links "$md_file" > "$links_file" while IFS="$(printf '\t')" read -r line_num link; do check_file_link "$md_file" "$link" "$line_num" || true done < "$links_file" rm -f "$links_file" done echo "" echo "✅ 扫描完成" echo "" # ============================================ # 生成检查报告 # ============================================ echo "========================================" echo "📊 链接检查结果统计" echo "========================================" echo "🔗 总链接数: $TOTAL_LINKS" echo "✅ 有效链接: $VALID_LINKS" echo "⏭️ 跳过链接: $SKIPPED_LINKS (外部/mailto)" echo "❌ 断开链接: $BROKEN_LINKS" if [ "$TOTAL_LINKS" -gt 0 ]; then CHECKED_LINKS=$((TOTAL_LINKS - SKIPPED_LINKS)) if [ "$CHECKED_LINKS" -gt 0 ]; then SUCCESS_RATE=$(awk "BEGIN {printf \"%.1f\", ($VALID_LINKS * 100.0) / $CHECKED_LINKS}") echo "📈 有效率: $SUCCESS_RATE%" fi fi echo "" # 写入报告 { echo "文档链接有效性检查报告" echo "========================" echo "" echo "检查时间: $(date '+%Y-%m-%d %H:%M:%S')" echo "检查目录: $PLAYBOOK_ROOT" echo "" echo "统计结果:" echo " 总链接数: $TOTAL_LINKS" echo " 有效链接: $VALID_LINKS" echo " 跳过链接: $SKIPPED_LINKS" echo " 断开链接: $BROKEN_LINKS" echo "" if [ "$BROKEN_LINKS" -gt 0 ]; then echo "断开链接详情:" echo "==============" cat "$BROKEN_LINKS_FILE" fi } > "$REPORT_FILE" if [ "$BROKEN_LINKS" -gt 0 ]; then echo "❌ 发现 $BROKEN_LINKS 个断开的链接" echo "" echo "断开链接详情:" cat "$BROKEN_LINKS_FILE" echo "" echo "📄 详细报告: $REPORT_FILE" fi echo "========================================" # 清理临时文件(保留报告用于 CI) # rm -f "$BROKEN_LINKS_FILE" # 返回结果 if [ "$BROKEN_LINKS" -eq 0 ]; then echo "✅ 所有文档链接检查通过" exit 0 else echo "❌ 文档链接检查失败" exit 1 fi