playbook/tests/integration/check_doc_links.sh

227 lines
5.9 KiB
Bash
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env sh
# 文档链接有效性检查脚本
set -eu
echo "========================================"
echo "🔗 文档链接有效性检查"
echo "========================================"
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
PLAYBOOK_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
TOTAL_LINKS=0
VALID_LINKS=0
BROKEN_LINKS=0
SKIPPED_LINKS=0
BROKEN_LINKS_FILE="/tmp/broken_links.txt"
REPORT_FILE="/tmp/doc_links_report.txt"
> "$BROKEN_LINKS_FILE"
> "$REPORT_FILE"
echo "📁 Playbook 根目录: $PLAYBOOK_ROOT"
echo ""
# ============================================
# 辅助函数
# ============================================
check_file_link() {
local source_file="$1"
local link_path="$2"
local link_line="$3"
TOTAL_LINKS=$((TOTAL_LINKS + 1))
# 处理相对路径
local source_dir
source_dir="$(dirname "$source_file")"
# 解析链接路径
local target_path="$link_path"
# 移除锚点
target_path="${target_path%%#*}"
# 跳过空链接
if [ -z "$target_path" ]; then
SKIPPED_LINKS=$((SKIPPED_LINKS + 1))
return 0
fi
# 跳过外部链接http/https
if echo "$target_path" | grep -qE "^https?://"; then
SKIPPED_LINKS=$((SKIPPED_LINKS + 1))
return 0
fi
# 跳过 mailto 链接
if echo "$target_path" | grep -q "^mailto:"; then
SKIPPED_LINKS=$((SKIPPED_LINKS + 1))
return 0
fi
# 构建绝对路径
local absolute_path
if echo "$target_path" | grep -q "^/"; then
# 绝对路径(从仓库根)
absolute_path="$PLAYBOOK_ROOT$target_path"
else
# 相对路径
absolute_path="$source_dir/$target_path"
fi
# 规范化路径
absolute_path="$(cd "$(dirname "$absolute_path")" 2>/dev/null && pwd)/$(basename "$absolute_path")" || absolute_path=""
# 检查文件是否存在
if [ -n "$absolute_path" ] && [ -e "$absolute_path" ]; then
VALID_LINKS=$((VALID_LINKS + 1))
return 0
else
BROKEN_LINKS=$((BROKEN_LINKS + 1))
echo "❌ 断链: $source_file:$link_line" >> "$BROKEN_LINKS_FILE"
echo " 链接: $link_path" >> "$BROKEN_LINKS_FILE"
echo " 目标: $absolute_path" >> "$BROKEN_LINKS_FILE"
echo "" >> "$BROKEN_LINKS_FILE"
return 1
fi
}
extract_links() {
awk '
BEGIN { in_code = 0 }
{
line = $0
if (line ~ /^```/) { in_code = !in_code; next }
if (in_code) next
gsub(/`[^`]*`/, "", line)
while (match(line, /\[[^]]+\]\([^)]*\)/)) {
link = substr(line, RSTART, RLENGTH)
sub(/^\[[^]]+\]\(/, "", link)
sub(/\)$/, "", link)
print NR "\t" link
line = substr(line, RSTART + RLENGTH)
}
if (match(line, /^\[[^]]+\]:[[:space:]]*[^[:space:]]+/)) {
link = substr(line, RSTART, RLENGTH)
sub(/^\[[^]]+\]:[[:space:]]*/, "", link)
sub(/[[:space:]].*$/, "", link)
print NR "\t" link
}
}
' "$1"
}
# ============================================
# 查找并检查所有 Markdown 文件
# ============================================
echo "🔍 扫描 Markdown 文件..."
cd "$PLAYBOOK_ROOT"
MD_FILES=$(find . -name "*.md" \
-not -path "*/node_modules/*" \
-not -path "*/.git/*" \
-not -path "*/build/*" \
-not -path "*/dist/*" \
2>/dev/null || true)
FILE_COUNT=$(echo "$MD_FILES" | grep -c "^" || echo 0)
echo "📄 找到 $FILE_COUNT 个 Markdown 文件"
echo ""
CURRENT_FILE_NUM=0
for md_file in $MD_FILES; do
CURRENT_FILE_NUM=$((CURRENT_FILE_NUM + 1))
# 显示进度
if [ "$CURRENT_FILE_NUM" -eq 1 ] || [ $((CURRENT_FILE_NUM % 10)) -eq 0 ] || [ "$CURRENT_FILE_NUM" -eq "$FILE_COUNT" ]; then
echo "📖 处理中... [$CURRENT_FILE_NUM/$FILE_COUNT] $md_file"
fi
links_file="$(mktemp)"
extract_links "$md_file" > "$links_file"
while IFS="$(printf '\t')" read -r line_num link; do
check_file_link "$md_file" "$link" "$line_num" || true
done < "$links_file"
rm -f "$links_file"
done
echo ""
echo "✅ 扫描完成"
echo ""
# ============================================
# 生成检查报告
# ============================================
echo "========================================"
echo "📊 链接检查结果统计"
echo "========================================"
echo "🔗 总链接数: $TOTAL_LINKS"
echo "✅ 有效链接: $VALID_LINKS"
echo "⏭️ 跳过链接: $SKIPPED_LINKS (外部/mailto)"
echo "❌ 断开链接: $BROKEN_LINKS"
if [ "$TOTAL_LINKS" -gt 0 ]; then
CHECKED_LINKS=$((TOTAL_LINKS - SKIPPED_LINKS))
if [ "$CHECKED_LINKS" -gt 0 ]; then
SUCCESS_RATE=$(awk "BEGIN {printf \"%.1f\", ($VALID_LINKS * 100.0) / $CHECKED_LINKS}")
echo "📈 有效率: $SUCCESS_RATE%"
fi
fi
echo ""
# 写入报告
{
echo "文档链接有效性检查报告"
echo "========================"
echo ""
echo "检查时间: $(date '+%Y-%m-%d %H:%M:%S')"
echo "检查目录: $PLAYBOOK_ROOT"
echo ""
echo "统计结果:"
echo " 总链接数: $TOTAL_LINKS"
echo " 有效链接: $VALID_LINKS"
echo " 跳过链接: $SKIPPED_LINKS"
echo " 断开链接: $BROKEN_LINKS"
echo ""
if [ "$BROKEN_LINKS" -gt 0 ]; then
echo "断开链接详情:"
echo "=============="
cat "$BROKEN_LINKS_FILE"
fi
} > "$REPORT_FILE"
if [ "$BROKEN_LINKS" -gt 0 ]; then
echo "❌ 发现 $BROKEN_LINKS 个断开的链接"
echo ""
echo "断开链接详情:"
cat "$BROKEN_LINKS_FILE"
echo ""
echo "📄 详细报告: $REPORT_FILE"
fi
echo "========================================"
# 清理临时文件(保留报告用于 CI
# rm -f "$BROKEN_LINKS_FILE"
# 返回结果
if [ "$BROKEN_LINKS" -eq 0 ]; then
echo "✅ 所有文档链接检查通过"
exit 0
else
echo "❌ 文档链接检查失败"
exit 1
fi