221 lines
5.6 KiB
Bash
221 lines
5.6 KiB
Bash
#!/usr/bin/env sh
|
||
# 文档链接有效性检查脚本
|
||
|
||
set -eu
|
||
|
||
echo "========================================"
|
||
echo "🔗 文档链接有效性检查"
|
||
echo "========================================"
|
||
|
||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||
PLAYBOOK_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
||
|
||
TOTAL_LINKS=0
|
||
VALID_LINKS=0
|
||
BROKEN_LINKS=0
|
||
SKIPPED_LINKS=0
|
||
|
||
BROKEN_LINKS_FILE="/tmp/broken_links.txt"
|
||
REPORT_FILE="/tmp/doc_links_report.txt"
|
||
|
||
> "$BROKEN_LINKS_FILE"
|
||
> "$REPORT_FILE"
|
||
|
||
echo "📁 Playbook 根目录: $PLAYBOOK_ROOT"
|
||
echo ""
|
||
|
||
# ============================================
|
||
# 辅助函数
|
||
# ============================================
|
||
|
||
check_file_link() {
|
||
local source_file="$1"
|
||
local link_path="$2"
|
||
local link_line="$3"
|
||
|
||
TOTAL_LINKS=$((TOTAL_LINKS + 1))
|
||
|
||
# 处理相对路径
|
||
local source_dir
|
||
source_dir="$(dirname "$source_file")"
|
||
|
||
# 解析链接路径
|
||
local target_path="$link_path"
|
||
|
||
# 移除锚点
|
||
target_path="${target_path%%#*}"
|
||
|
||
# 跳过空链接
|
||
if [ -z "$target_path" ]; then
|
||
SKIPPED_LINKS=$((SKIPPED_LINKS + 1))
|
||
return 0
|
||
fi
|
||
|
||
# 跳过外部链接(http/https)
|
||
if echo "$target_path" | grep -qE "^https?://"; then
|
||
SKIPPED_LINKS=$((SKIPPED_LINKS + 1))
|
||
return 0
|
||
fi
|
||
|
||
# 跳过 mailto 链接
|
||
if echo "$target_path" | grep -q "^mailto:"; then
|
||
SKIPPED_LINKS=$((SKIPPED_LINKS + 1))
|
||
return 0
|
||
fi
|
||
|
||
# 构建绝对路径
|
||
local absolute_path
|
||
if echo "$target_path" | grep -q "^/"; then
|
||
# 绝对路径(从仓库根)
|
||
absolute_path="$PLAYBOOK_ROOT$target_path"
|
||
else
|
||
# 相对路径
|
||
absolute_path="$source_dir/$target_path"
|
||
fi
|
||
|
||
# 规范化路径
|
||
absolute_path="$(cd "$(dirname "$absolute_path")" 2>/dev/null && pwd)/$(basename "$absolute_path")" || absolute_path=""
|
||
|
||
# 检查文件是否存在
|
||
if [ -n "$absolute_path" ] && [ -e "$absolute_path" ]; then
|
||
VALID_LINKS=$((VALID_LINKS + 1))
|
||
return 0
|
||
else
|
||
BROKEN_LINKS=$((BROKEN_LINKS + 1))
|
||
echo "❌ 断链: $source_file:$link_line" >> "$BROKEN_LINKS_FILE"
|
||
echo " 链接: $link_path" >> "$BROKEN_LINKS_FILE"
|
||
echo " 目标: $absolute_path" >> "$BROKEN_LINKS_FILE"
|
||
echo "" >> "$BROKEN_LINKS_FILE"
|
||
return 1
|
||
fi
|
||
}
|
||
|
||
extract_links() {
|
||
awk '
|
||
BEGIN { in_code = 0 }
|
||
{
|
||
line = $0
|
||
if (line ~ /^```/) { in_code = !in_code; next }
|
||
if (in_code) next
|
||
|
||
gsub(/`[^`]*`/, "", line)
|
||
|
||
while (match(line, /\[[^]]+\]\(([^)]+)\)/, m)) {
|
||
print NR "\t" m[1]
|
||
line = substr(line, RSTART + RLENGTH)
|
||
}
|
||
|
||
if (match(line, /^\[[^]]+\]:[[:space:]]*(.+)/, ref)) {
|
||
print NR "\t" ref[1]
|
||
}
|
||
}
|
||
' "$1"
|
||
}
|
||
|
||
# ============================================
|
||
# 查找并检查所有 Markdown 文件
|
||
# ============================================
|
||
|
||
echo "🔍 扫描 Markdown 文件..."
|
||
|
||
cd "$PLAYBOOK_ROOT"
|
||
|
||
MD_FILES=$(find . -name "*.md" \
|
||
-not -path "*/node_modules/*" \
|
||
-not -path "*/.git/*" \
|
||
-not -path "*/build/*" \
|
||
-not -path "*/dist/*" \
|
||
2>/dev/null || true)
|
||
|
||
FILE_COUNT=$(echo "$MD_FILES" | grep -c "^" || echo 0)
|
||
echo "📄 找到 $FILE_COUNT 个 Markdown 文件"
|
||
echo ""
|
||
|
||
CURRENT_FILE_NUM=0
|
||
|
||
for md_file in $MD_FILES; do
|
||
CURRENT_FILE_NUM=$((CURRENT_FILE_NUM + 1))
|
||
|
||
# 显示进度
|
||
if [ "$CURRENT_FILE_NUM" -eq 1 ] || [ $((CURRENT_FILE_NUM % 10)) -eq 0 ] || [ "$CURRENT_FILE_NUM" -eq "$FILE_COUNT" ]; then
|
||
echo "📖 处理中... [$CURRENT_FILE_NUM/$FILE_COUNT] $md_file"
|
||
fi
|
||
|
||
links_file="$(mktemp)"
|
||
extract_links "$md_file" > "$links_file"
|
||
while IFS="$(printf '\t')" read -r line_num link; do
|
||
check_file_link "$md_file" "$link" "$line_num" || true
|
||
done < "$links_file"
|
||
rm -f "$links_file"
|
||
done
|
||
|
||
echo ""
|
||
echo "✅ 扫描完成"
|
||
echo ""
|
||
|
||
# ============================================
|
||
# 生成检查报告
|
||
# ============================================
|
||
|
||
echo "========================================"
|
||
echo "📊 链接检查结果统计"
|
||
echo "========================================"
|
||
echo "🔗 总链接数: $TOTAL_LINKS"
|
||
echo "✅ 有效链接: $VALID_LINKS"
|
||
echo "⏭️ 跳过链接: $SKIPPED_LINKS (外部/mailto)"
|
||
echo "❌ 断开链接: $BROKEN_LINKS"
|
||
|
||
if [ "$TOTAL_LINKS" -gt 0 ]; then
|
||
CHECKED_LINKS=$((TOTAL_LINKS - SKIPPED_LINKS))
|
||
if [ "$CHECKED_LINKS" -gt 0 ]; then
|
||
SUCCESS_RATE=$(awk "BEGIN {printf \"%.1f\", ($VALID_LINKS * 100.0) / $CHECKED_LINKS}")
|
||
echo "📈 有效率: $SUCCESS_RATE%"
|
||
fi
|
||
fi
|
||
|
||
echo ""
|
||
|
||
# 写入报告
|
||
{
|
||
echo "文档链接有效性检查报告"
|
||
echo "========================"
|
||
echo ""
|
||
echo "检查时间: $(date '+%Y-%m-%d %H:%M:%S')"
|
||
echo "检查目录: $PLAYBOOK_ROOT"
|
||
echo ""
|
||
echo "统计结果:"
|
||
echo " 总链接数: $TOTAL_LINKS"
|
||
echo " 有效链接: $VALID_LINKS"
|
||
echo " 跳过链接: $SKIPPED_LINKS"
|
||
echo " 断开链接: $BROKEN_LINKS"
|
||
echo ""
|
||
if [ "$BROKEN_LINKS" -gt 0 ]; then
|
||
echo "断开链接详情:"
|
||
echo "=============="
|
||
cat "$BROKEN_LINKS_FILE"
|
||
fi
|
||
} > "$REPORT_FILE"
|
||
|
||
if [ "$BROKEN_LINKS" -gt 0 ]; then
|
||
echo "❌ 发现 $BROKEN_LINKS 个断开的链接"
|
||
echo ""
|
||
echo "断开链接详情:"
|
||
cat "$BROKEN_LINKS_FILE"
|
||
echo ""
|
||
echo "📄 详细报告: $REPORT_FILE"
|
||
fi
|
||
|
||
echo "========================================"
|
||
|
||
# 清理临时文件(保留报告用于 CI)
|
||
# rm -f "$BROKEN_LINKS_FILE"
|
||
|
||
# 返回结果
|
||
if [ "$BROKEN_LINKS" -eq 0 ]; then
|
||
echo "✅ 所有文档链接检查通过"
|
||
exit 0
|
||
else
|
||
echo "❌ 文档链接检查失败"
|
||
exit 1
|
||
fi
|