#!/usr/bin/env python3 """ cli_audit.py — a lightweight CLI "citizenship" checker. Usage: python scripts/cli_audit.py -- [args...] Examples: python scripts/cli_audit.py -- ./mycmd python scripts/cli_audit.py -- mycmd subcmd What it checks (heuristically): - --help works (exit 0) and looks like help - invalid flag produces non-zero and error on stderr - common conventions appear in help (e.g., --version, --json, --no-color) - ANSI escape codes / animations in non-TTY output (captured output is non-TTY) - NO_COLOR / TERM=dumb behavior (best-effort) Notes: - This script does NOT "prove" correctness; it flags likely UX/composability issues. - Some checks are WARN (recommendations), not FAIL (hard requirements). """ from __future__ import annotations import argparse import os import re import shutil import subprocess import sys from dataclasses import dataclass from typing import Dict, List, Optional, Sequence, Tuple ANSI_RE = re.compile( r""" \x1b # ESC (?: \[ [0-?]* [ -/]* [@-~] # CSI sequences | \] .*? (?:\x07|\x1b\\) # OSC sequences | [@-Z\\-_] # 2-character sequences ) """, re.VERBOSE | re.DOTALL, ) @dataclass class RunResult: argv: List[str] returncode: Optional[int] stdout: str stderr: str timed_out: bool @dataclass class Finding: level: str # PASS | WARN | FAIL title: str details: str = "" def _decode(b: bytes) -> str: return b.decode("utf-8", errors="replace") def run_cmd( argv: Sequence[str], timeout_s: float, env_overrides: Optional[Dict[str, str]] = None, ) -> RunResult: env = os.environ.copy() if env_overrides: env.update(env_overrides) try: proc = subprocess.run( list(argv), stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env, timeout=timeout_s, check=False, ) return RunResult( argv=list(argv), returncode=proc.returncode, stdout=_decode(proc.stdout), stderr=_decode(proc.stderr), timed_out=False, ) except FileNotFoundError: return RunResult( argv=list(argv), returncode=None, stdout="", stderr="Command not found.", timed_out=False, ) except subprocess.TimeoutExpired as e: return RunResult( argv=list(argv), returncode=None, stdout=_decode(e.stdout or b""), stderr=_decode(e.stderr or b""), timed_out=True, ) def has_ansi(s: str) -> bool: return bool(ANSI_RE.search(s)) def has_carriage_returns(s: str) -> bool: return "\r" in s def looks_like_help(text: str) -> bool: t = text.lower() return any(k in t for k in ["usage:", "\nusage", "synopsis", "options", "commands"]) def find_flag_mentions(help_text: str) -> Dict[str, bool]: t = help_text flags = { "--help": "--help" in t, "-h": re.search(r"(^|\s)-h(\s|,|$)", t) is not None, "--version": "--version" in t, "--json": "--json" in t, "--plain": "--plain" in t, "--no-color": "--no-color" in t, "NO_COLOR": "NO_COLOR" in t, "--no-input": "--no-input" in t, "--dry-run": "--dry-run" in t, "--force": "--force" in t, "--quiet": "--quiet" in t or re.search(r"(^|\s)-q(\s|,|$)", t) is not None, "--verbose": "--verbose" in t or re.search(r"(^|\s)-v(\s|,|$)", t) is not None, "--debug": "--debug" in t or re.search(r"(^|\s)-d(\s|,|$)", t) is not None, } return flags def format_findings(findings: List[Finding]) -> str: def icon(level: str) -> str: return {"PASS": "[PASS]", "WARN": "[WARN]", "FAIL": "[FAIL]"}.get( level, "[INFO]" ) lines: List[str] = [] for f in findings: lines.append(f"{icon(f.level)} {f.title}") if f.details.strip(): for line in f.details.rstrip().splitlines(): lines.append(f" {line}") return "\n".join(lines) + "\n" def main() -> int: parser = argparse.ArgumentParser(add_help=True) parser.add_argument( "--timeout", type=float, default=10.0, help="Per-invocation timeout in seconds (default: 10).", ) parser.add_argument( "--strict", action="store_true", help="Treat WARN as FAIL for exit status purposes.", ) parser.add_argument( "--print-output", action="store_true", help="Print captured stdout/stderr for each probe.", ) parser.add_argument( "cmd", nargs=argparse.REMAINDER, help="Command to audit (must be provided after --).", ) args = parser.parse_args() if not args.cmd: print( "Error: no command provided.\n\nUsage:\n python scripts/cli_audit.py -- [args...]\n", file=sys.stderr, ) return 2 # If user forgot the -- separator, try to recover. cmd = args.cmd if cmd and cmd[0] == "--": cmd = cmd[1:] if not cmd: print("Error: no command provided after --.", file=sys.stderr) return 2 exe = cmd[0] if shutil.which(exe) is None and not os.path.exists(exe): print(f"Error: command not found: {exe}", file=sys.stderr) return 127 findings: List[Finding] = [] # Probe: --help help_res = run_cmd(cmd + ["--help"], timeout_s=args.timeout) if help_res.timed_out: findings.append( Finding("FAIL", "--help timed out", "Help should return quickly.") ) elif help_res.returncode is None: findings.append( Finding( "FAIL", "--help failed to execute", help_res.stderr.strip() or "Unknown error.", ) ) else: if help_res.returncode != 0: findings.append( Finding( "FAIL", f"--help exit code was {help_res.returncode}", "Help should exit 0.", ) ) else: findings.append(Finding("PASS", "--help exits with code 0")) combined = (help_res.stdout + "\n" + help_res.stderr).strip() if not combined: findings.append(Finding("FAIL", "--help produced no output")) else: if looks_like_help(combined): findings.append( Finding( "PASS", "--help output looks like help (usage/options/commands detected)", ) ) else: findings.append( Finding( "WARN", "--help output did not obviously look like help", "Check formatting and content.", ) ) if help_res.stdout.strip() and not help_res.stderr.strip(): findings.append(Finding("PASS", "Help printed to stdout")) elif help_res.stderr.strip() and not help_res.stdout.strip(): findings.append( Finding( "WARN", "Help printed to stderr", "Common convention is help on stdout; stderr is typically for errors.", ) ) else: findings.append( Finding( "WARN", "Help printed to both stdout and stderr", "Prefer help on stdout; reserve stderr for errors/warnings.", ) ) if args.print_output: print("== PROBE: --help ==") print("--- stdout ---") print(help_res.stdout.rstrip()) print("--- stderr ---") print(help_res.stderr.rstrip()) print() # Probe: -h (recommended, not required) h_res = run_cmd(cmd + ["-h"], timeout_s=args.timeout) if h_res.timed_out: findings.append( Finding( "WARN", "-h timed out", "If you support -h, it should return quickly." ) ) elif h_res.returncode == 0 and (h_res.stdout.strip() or h_res.stderr.strip()): findings.append(Finding("PASS", "-h works (exit 0)")) else: findings.append( Finding( "WARN", "-h did not behave like help", "If you intentionally use -h for something else, consider avoiding that.", ) ) # Probe: invalid flag bad_flag = "--definitely-not-a-real-flag-xyz" bad_res = run_cmd(cmd + [bad_flag], timeout_s=args.timeout) if bad_res.timed_out: findings.append( Finding( "FAIL", "Invalid-flag probe timed out", "Invalid input should fail fast with guidance.", ) ) elif bad_res.returncode is None: findings.append( Finding( "FAIL", "Invalid-flag probe failed to execute", bad_res.stderr.strip() or "Unknown error.", ) ) else: if bad_res.returncode == 0: findings.append( Finding( "FAIL", "Unknown flag returned exit code 0", "Unknown flags should be an error.", ) ) else: findings.append( Finding("PASS", f"Unknown flag returns non-zero ({bad_res.returncode})") ) if bad_res.stderr.strip(): findings.append(Finding("PASS", "Unknown-flag error printed to stderr")) else: findings.append( Finding( "WARN", "Unknown-flag error not printed to stderr", "Prefer errors on stderr.", ) ) if "--help" in (bad_res.stdout + bad_res.stderr): findings.append(Finding("PASS", "Unknown-flag error mentions --help")) else: findings.append( Finding( "WARN", "Unknown-flag error does not mention --help", "Consider adding a hint to discover help.", ) ) noisy_markers = [ "Traceback (most recent call last)", "panic:", "stack trace", "Stack trace", ] if any(m in (bad_res.stdout + bad_res.stderr) for m in noisy_markers): findings.append( Finding( "WARN", "Error output includes a stack trace marker", "Prefer stack traces only in --debug/--verbose mode.", ) ) if args.print_output: print("== PROBE: invalid flag ==") print("--- stdout ---") print(bad_res.stdout.rstrip()) print("--- stderr ---") print(bad_res.stderr.rstrip()) print() # Analyze help for common conventions help_text = help_res.stdout + "\n" + help_res.stderr flag_mentions = find_flag_mentions(help_text) if flag_mentions.get("--version"): findings.append(Finding("PASS", "Help mentions --version")) else: findings.append( Finding( "WARN", "Help does not mention --version", "Consider supporting --version for discoverability.", ) ) if flag_mentions.get("--json"): findings.append(Finding("PASS", "Help mentions --json")) else: findings.append( Finding( "WARN", "Help does not mention --json", "If scripts may consume output, consider a structured JSON mode.", ) ) if flag_mentions.get("--plain"): findings.append(Finding("PASS", "Help mentions --plain")) else: findings.append( Finding( "WARN", "Help does not mention --plain", "If human output is formatted, a stable plain mode helps scripting.", ) ) if flag_mentions.get("--no-color") or flag_mentions.get("NO_COLOR"): findings.append( Finding("PASS", "Help mentions color controls (--no-color and/or NO_COLOR)") ) else: findings.append( Finding( "WARN", "Help does not mention color controls", "Consider supporting --no-color and NO_COLOR.", ) ) if flag_mentions.get("--no-input"): findings.append(Finding("PASS", "Help mentions --no-input")) else: findings.append( Finding( "WARN", "Help does not mention --no-input", "If you prompt, consider a non-interactive escape hatch.", ) ) # ANSI / animation checks (captured output is non-TTY) if has_ansi(help_res.stdout) or has_ansi(help_res.stderr): findings.append( Finding( "WARN", "ANSI escape sequences detected in --help output (captured/non-TTY)", "Consider disabling color/formatting when output is not a TTY, or when NO_COLOR is set.", ) ) else: findings.append( Finding( "PASS", "No ANSI escape sequences detected in captured --help output" ) ) if has_carriage_returns(help_res.stdout) or has_carriage_returns(help_res.stderr): findings.append( Finding( "WARN", "Carriage returns detected in --help output", "This can indicate animations/progress behavior; ensure you don't animate when not a TTY.", ) ) # NO_COLOR / TERM=dumb best-effort probes (only meaningful if the tool would emit ANSI) no_color_res = run_cmd( cmd + ["--help"], timeout_s=args.timeout, env_overrides={"NO_COLOR": "1"} ) if has_ansi(no_color_res.stdout) or has_ansi(no_color_res.stderr): findings.append( Finding( "WARN", "ANSI still present with NO_COLOR=1", "Consider honoring NO_COLOR to disable color output.", ) ) else: findings.append( Finding("PASS", "NO_COLOR=1 produced no ANSI sequences (best-effort check)") ) dumb_term_res = run_cmd( cmd + ["--help"], timeout_s=args.timeout, env_overrides={"TERM": "dumb"} ) if has_ansi(dumb_term_res.stdout) or has_ansi(dumb_term_res.stderr): findings.append( Finding( "WARN", "ANSI still present with TERM=dumb", "Consider disabling ANSI when TERM=dumb.", ) ) else: findings.append( Finding("PASS", "TERM=dumb produced no ANSI sequences (best-effort check)") ) # Summary and exit status fail = sum(1 for f in findings if f.level == "FAIL") warn = sum(1 for f in findings if f.level == "WARN") print(format_findings(findings)) print(f"Summary: {fail} FAIL, {warn} WARN") if fail > 0: return 1 if args.strict and warn > 0: return 1 return 0 if __name__ == "__main__": raise SystemExit(main())