playbook/outfitter-agents/plugins/outfitter/skills/docs-audit/scripts/discover-docs.ts

367 lines
10 KiB
TypeScript
Executable File

#!/usr/bin/env bun
/**
* discover-docs.ts - Markdown documentation discovery with git metadata
*
* Extracts a compact manifest of all markdown files with git history,
* activity status, and related code file tracking for staleness detection.
*
* Usage:
* ./discover-docs.ts # All markdown files
* ./discover-docs.ts --path src/ # Specific directory
* ./discover-docs.ts --limit 20 # Limit results
* ./discover-docs.ts --sort staleness # Sort by staleness (default)
* ./discover-docs.ts --sort alpha # Sort alphabetically
* ./discover-docs.ts --format json # JSON output (default)
* ./discover-docs.ts --format text # Human-readable output
*/
import { $ } from "bun";
import { parseArgs } from "util";
import { stat } from "node:fs/promises";
/**
* Documentation file with git metadata.
*/
interface DocFile {
/** File path relative to repo root */
path: string;
/** Short SHA of last commit touching this file */
lastCommitSha: string;
/** ISO date of last commit */
lastCommitDate: string;
/** Author name of last commit */
lastAuthor: string;
/** Days since last modification */
daysAgo: number;
/** Activity classification based on age */
activityStatus: "active" | "recent" | "idle" | "stale" | "ancient";
/** Line count */
lines: number;
/** File size in bytes */
bytes: number;
/** Code files modified in same commits */
relatedCodeFiles: string[];
}
/**
* Discovery manifest with all markdown files.
*/
interface Manifest {
/** Generation timestamp */
generated: string;
/** Repository root path */
repoRoot: string;
/** Whether this is a git repository */
isGitRepo: boolean;
/** Total files discovered */
totalFiles: number;
/** Documentation files with metadata */
files: DocFile[];
}
// Activity status thresholds (days)
const ACTIVITY_THRESHOLDS = {
active: 7,
recent: 30,
idle: 90,
stale: 365,
// ancient: > 365
} as const;
/**
* Gets activity status classification based on days since last modification.
* @param daysAgo - Days since last modification
* @returns Activity status classification
*/
function getActivityStatus(daysAgo: number): DocFile["activityStatus"] {
if (daysAgo < ACTIVITY_THRESHOLDS.active) return "active";
if (daysAgo < ACTIVITY_THRESHOLDS.recent) return "recent";
if (daysAgo < ACTIVITY_THRESHOLDS.idle) return "idle";
if (daysAgo < ACTIVITY_THRESHOLDS.stale) return "stale";
return "ancient";
}
function daysBetween(date1: Date, date2: Date): number {
const msPerDay = 1000 * 60 * 60 * 24;
return Math.floor((date2.getTime() - date1.getTime()) / msPerDay);
}
async function isGitRepo(): Promise<boolean> {
try {
await $`git rev-parse --is-inside-work-tree`.quiet();
return true;
} catch {
return false;
}
}
async function getRepoRoot(): Promise<string> {
try {
const result = await $`git rev-parse --show-toplevel`.quiet();
return result.text().trim();
} catch {
return process.cwd();
}
}
async function findMarkdownFiles(searchPath?: string): Promise<string[]> {
const pattern = searchPath
? `${searchPath.replace(/\/$/, "")}/**/*.md*`
: "**/*.md*";
try {
// Use git ls-files if in a git repo (respects .gitignore)
const result = await $`git ls-files --cached --others --exclude-standard "${pattern}"`.quiet();
return result
.text()
.trim()
.split("\n")
.filter((f) => f.length > 0);
} catch {
// Fallback to find command
const result = await $`find ${searchPath || "."} -name "*.md" -o -name "*.mdx" 2>/dev/null`.quiet();
return result
.text()
.trim()
.split("\n")
.filter((f) => f.length > 0);
}
}
async function getGitInfo(
filePath: string
): Promise<Pick<DocFile, "lastCommitSha" | "lastCommitDate" | "lastAuthor" | "daysAgo">> {
try {
// Get last commit info: short SHA, ISO date, author name
const result = await $`git log -1 --format="%h|%aI|%an" -- "${filePath}"`.quiet();
const output = result.text().trim();
if (!output) {
// File not yet committed
return {
lastCommitSha: "uncommitted",
lastCommitDate: new Date().toISOString(),
lastAuthor: "unknown",
daysAgo: 0,
};
}
const [sha, date, author] = output.split("|");
const commitDate = new Date(date);
const daysAgo = daysBetween(commitDate, new Date());
return {
lastCommitSha: sha,
lastCommitDate: date,
lastAuthor: author,
daysAgo,
};
} catch {
return {
lastCommitSha: "unknown",
lastCommitDate: new Date().toISOString(),
lastAuthor: "unknown",
daysAgo: 0,
};
}
}
async function getFileStats(filePath: string): Promise<Pick<DocFile, "lines" | "bytes">> {
try {
const stats = await stat(filePath);
const bytes = stats.size;
// Count lines
const file = Bun.file(filePath);
const content = await file.text();
const lines = content.split("\n").length;
return { lines, bytes };
} catch {
return { lines: 0, bytes: 0 };
}
}
async function getRelatedCodeFiles(docPath: string): Promise<string[]> {
try {
// Find code files that have been modified in the same commits as this doc
// Look at last 10 commits that touched this file
const result = await $`git log -10 --format="%H" -- "${docPath}"`.quiet();
const commits = result.text().trim().split("\n").filter(Boolean);
if (commits.length === 0) return [];
const codeExtensions = [
".ts",
".tsx",
".js",
".jsx",
".py",
".rs",
".go",
".java",
".rb",
".c",
".cpp",
".h",
];
const relatedFiles = new Set<string>();
for (const commit of commits.slice(0, 5)) {
// Limit to 5 commits for speed
try {
const filesResult = await $`git diff-tree --no-commit-id --name-only -r ${commit}`.quiet();
const files = filesResult.text().trim().split("\n");
for (const file of files) {
if (file !== docPath && codeExtensions.some((ext) => file.endsWith(ext))) {
relatedFiles.add(file);
}
}
} catch {
// Skip this commit if we can't get its files
}
}
return Array.from(relatedFiles).slice(0, 10); // Limit to 10 related files
} catch {
return [];
}
}
async function analyzeFile(filePath: string, useGit: boolean): Promise<DocFile> {
const [gitInfo, fileStats, relatedCodeFiles] = await Promise.all([
useGit ? getGitInfo(filePath) : Promise.resolve({
lastCommitSha: "n/a",
lastCommitDate: new Date().toISOString(),
lastAuthor: "n/a",
daysAgo: 0,
}),
getFileStats(filePath),
useGit ? getRelatedCodeFiles(filePath) : Promise.resolve([]),
]);
return {
path: filePath,
...gitInfo,
activityStatus: getActivityStatus(gitInfo.daysAgo),
...fileStats,
relatedCodeFiles,
};
}
function sortFiles(files: DocFile[], sortBy: "staleness" | "alpha"): DocFile[] {
if (sortBy === "alpha") {
return files.sort((a, b) => a.path.localeCompare(b.path));
}
// Sort by staleness (most stale first)
return files.sort((a, b) => b.daysAgo - a.daysAgo);
}
function formatText(manifest: Manifest): string {
const lines: string[] = [
`Documentation Audit Discovery`,
`Generated: ${manifest.generated}`,
`Repo: ${manifest.repoRoot}`,
`Git: ${manifest.isGitRepo ? "yes" : "no"}`,
`Total files: ${manifest.totalFiles}`,
``,
`Files:`,
``,
];
const statusEmoji: Record<DocFile["activityStatus"], string> = {
active: "🟢",
recent: "🟡",
idle: "🟠",
stale: "🔴",
ancient: "⚫",
};
for (const file of manifest.files) {
lines.push(`${statusEmoji[file.activityStatus]} ${file.path}`);
lines.push(` SHA: ${file.lastCommitSha} | ${file.daysAgo}d ago | ${file.lastAuthor}`);
lines.push(` Size: ${file.lines} lines, ${file.bytes} bytes`);
if (file.relatedCodeFiles.length > 0) {
lines.push(` Related: ${file.relatedCodeFiles.slice(0, 3).join(", ")}${file.relatedCodeFiles.length > 3 ? "..." : ""}`);
}
lines.push(``);
}
return lines.join("\n");
}
// Main
async function main() {
const { values } = parseArgs({
args: Bun.argv.slice(2),
options: {
path: { type: "string", short: "p" },
limit: { type: "string", short: "l" },
sort: { type: "string", short: "s", default: "staleness" },
format: { type: "string", short: "f", default: "json" },
help: { type: "boolean", short: "h" },
},
allowPositionals: true,
});
if (values.help) {
console.log(`
discover-docs.ts - Markdown documentation discovery
Usage:
./discover-docs.ts [options]
Options:
--path, -p <dir> Search in specific directory
--limit, -l <n> Limit number of results
--sort, -s <by> Sort by: staleness (default), alpha
--format, -f <fmt> Output format: json (default), text
--help, -h Show this help
`);
process.exit(0);
}
const useGit = await isGitRepo();
const repoRoot = await getRepoRoot();
const files = await findMarkdownFiles(values.path);
// Analyze files in parallel (batched for large repos)
const batchSize = 20;
const docFiles: DocFile[] = [];
for (let i = 0; i < files.length; i += batchSize) {
const batch = files.slice(i, i + batchSize);
const results = await Promise.all(batch.map((f) => analyzeFile(f, useGit)));
docFiles.push(...results);
}
// Sort
const sortBy = (values.sort === "alpha" ? "alpha" : "staleness") as "staleness" | "alpha";
const sorted = sortFiles(docFiles, sortBy);
// Limit
const limit = values.limit ? parseInt(values.limit, 10) : sorted.length;
const limited = sorted.slice(0, limit);
const manifest: Manifest = {
generated: new Date().toISOString(),
repoRoot,
isGitRepo: useGit,
totalFiles: files.length,
files: limited,
};
if (values.format === "text") {
console.log(formatText(manifest));
} else {
console.log(JSON.stringify(manifest, null, 2));
}
}
main().catch((err) => {
console.error("Error:", err.message);
process.exit(1);
});