playbook/superpowers/tests/claude-code/test-subagent-driven-develo...

315 lines
9.2 KiB
Bash

#!/usr/bin/env bash
# Integration Test: subagent-driven-development workflow
# Actually executes a plan and verifies the new workflow behaviors
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
source "$SCRIPT_DIR/test-helpers.sh"
echo "========================================"
echo " Integration Test: subagent-driven-development"
echo "========================================"
echo ""
echo "This test executes a real plan using the skill and verifies:"
echo " 1. Plan is read once (not per task)"
echo " 2. Full task text provided to subagents"
echo " 3. Subagents perform self-review"
echo " 4. Spec compliance review before code quality"
echo " 5. Review loops when issues found"
echo " 6. Spec reviewer reads code independently"
echo ""
echo "WARNING: This test may take 10-30 minutes to complete."
echo ""
# Create test project
TEST_PROJECT=$(create_test_project)
echo "Test project: $TEST_PROJECT"
# Trap to cleanup
trap "cleanup_test_project $TEST_PROJECT" EXIT
# Set up minimal Node.js project
cd "$TEST_PROJECT"
cat > package.json <<'EOF'
{
"name": "test-project",
"version": "1.0.0",
"type": "module",
"scripts": {
"test": "node --test"
}
}
EOF
mkdir -p src test docs/plans
# Create a simple implementation plan
cat > docs/plans/implementation-plan.md <<'EOF'
# Test Implementation Plan
This is a minimal plan to test the subagent-driven-development workflow.
## Task 1: Create Add Function
Create a function that adds two numbers.
**File:** `src/math.js`
**Requirements:**
- Function named `add`
- Takes two parameters: `a` and `b`
- Returns the sum of `a` and `b`
- Export the function
**Implementation:**
```javascript
export function add(a, b) {
return a + b;
}
```
**Tests:** Create `test/math.test.js` that verifies:
- `add(2, 3)` returns `5`
- `add(0, 0)` returns `0`
- `add(-1, 1)` returns `0`
**Verification:** `npm test`
## Task 2: Create Multiply Function
Create a function that multiplies two numbers.
**File:** `src/math.js` (add to existing file)
**Requirements:**
- Function named `multiply`
- Takes two parameters: `a` and `b`
- Returns the product of `a` and `b`
- Export the function
- DO NOT add any extra features (like power, divide, etc.)
**Implementation:**
```javascript
export function multiply(a, b) {
return a * b;
}
```
**Tests:** Add to `test/math.test.js`:
- `multiply(2, 3)` returns `6`
- `multiply(0, 5)` returns `0`
- `multiply(-2, 3)` returns `-6`
**Verification:** `npm test`
EOF
# Initialize git repo
git init --quiet
git config user.email "test@test.com"
git config user.name "Test User"
git add .
git commit -m "Initial commit" --quiet
echo ""
echo "Project setup complete. Starting execution..."
echo ""
# Run Claude with subagent-driven-development
# Capture full output to analyze
OUTPUT_FILE="$TEST_PROJECT/claude-output.txt"
# Create prompt file
cat > "$TEST_PROJECT/prompt.txt" <<'EOF'
I want you to execute the implementation plan at docs/plans/implementation-plan.md using the subagent-driven-development skill.
IMPORTANT: Follow the skill exactly. I will be verifying that you:
1. Read the plan once at the beginning
2. Provide full task text to subagents (don't make them read files)
3. Ensure subagents do self-review before reporting
4. Run spec compliance review before code quality review
5. Use review loops when issues are found
Begin now. Execute the plan.
EOF
# Note: We use a longer timeout since this is integration testing
# Use --allowed-tools to enable tool usage in headless mode
# IMPORTANT: Run from superpowers directory so local dev skills are available
PROMPT="Change to directory $TEST_PROJECT and then execute the implementation plan at docs/plans/implementation-plan.md using the subagent-driven-development skill.
IMPORTANT: Follow the skill exactly. I will be verifying that you:
1. Read the plan once at the beginning
2. Provide full task text to subagents (don't make them read files)
3. Ensure subagents do self-review before reporting
4. Run spec compliance review before code quality review
5. Use review loops when issues are found
Begin now. Execute the plan."
echo "Running Claude (output will be shown below and saved to $OUTPUT_FILE)..."
echo "================================================================================"
cd "$SCRIPT_DIR/../.." && timeout 1800 claude -p "$PROMPT" --allowed-tools=all --add-dir "$TEST_PROJECT" --permission-mode bypassPermissions 2>&1 | tee "$OUTPUT_FILE" || {
echo ""
echo "================================================================================"
echo "EXECUTION FAILED (exit code: $?)"
exit 1
}
echo "================================================================================"
echo ""
echo "Execution complete. Analyzing results..."
echo ""
# Find the session transcript
# Session files are in ~/.claude/projects/-<working-dir>/<session-id>.jsonl
WORKING_DIR_ESCAPED=$(echo "$SCRIPT_DIR/../.." | sed 's/\//-/g' | sed 's/^-//')
SESSION_DIR="$HOME/.claude/projects/$WORKING_DIR_ESCAPED"
# Find the most recent session file (created during this test run)
SESSION_FILE=$(find "$SESSION_DIR" -name "*.jsonl" -type f -mmin -60 2>/dev/null | sort -r | head -1)
if [ -z "$SESSION_FILE" ]; then
echo "ERROR: Could not find session transcript file"
echo "Looked in: $SESSION_DIR"
exit 1
fi
echo "Analyzing session transcript: $(basename "$SESSION_FILE")"
echo ""
# Verification tests
FAILED=0
echo "=== Verification Tests ==="
echo ""
# Test 1: Skill was invoked
echo "Test 1: Skill tool invoked..."
if grep -q '"name":"Skill".*"skill":"superpowers:subagent-driven-development"' "$SESSION_FILE"; then
echo " [PASS] subagent-driven-development skill was invoked"
else
echo " [FAIL] Skill was not invoked"
FAILED=$((FAILED + 1))
fi
echo ""
# Test 2: Subagents were used (Task tool)
echo "Test 2: Subagents dispatched..."
task_count=$(grep -c '"name":"Task"' "$SESSION_FILE" || echo "0")
if [ "$task_count" -ge 2 ]; then
echo " [PASS] $task_count subagents dispatched"
else
echo " [FAIL] Only $task_count subagent(s) dispatched (expected >= 2)"
FAILED=$((FAILED + 1))
fi
echo ""
# Test 3: TodoWrite was used for tracking
echo "Test 3: Task tracking..."
todo_count=$(grep -c '"name":"TodoWrite"' "$SESSION_FILE" || echo "0")
if [ "$todo_count" -ge 1 ]; then
echo " [PASS] TodoWrite used $todo_count time(s) for task tracking"
else
echo " [FAIL] TodoWrite not used"
FAILED=$((FAILED + 1))
fi
echo ""
# Test 6: Implementation actually works
echo "Test 6: Implementation verification..."
if [ -f "$TEST_PROJECT/src/math.js" ]; then
echo " [PASS] src/math.js created"
if grep -q "export function add" "$TEST_PROJECT/src/math.js"; then
echo " [PASS] add function exists"
else
echo " [FAIL] add function missing"
FAILED=$((FAILED + 1))
fi
if grep -q "export function multiply" "$TEST_PROJECT/src/math.js"; then
echo " [PASS] multiply function exists"
else
echo " [FAIL] multiply function missing"
FAILED=$((FAILED + 1))
fi
else
echo " [FAIL] src/math.js not created"
FAILED=$((FAILED + 1))
fi
if [ -f "$TEST_PROJECT/test/math.test.js" ]; then
echo " [PASS] test/math.test.js created"
else
echo " [FAIL] test/math.test.js not created"
FAILED=$((FAILED + 1))
fi
# Try running tests
if cd "$TEST_PROJECT" && npm test > test-output.txt 2>&1; then
echo " [PASS] Tests pass"
else
echo " [FAIL] Tests failed"
cat test-output.txt
FAILED=$((FAILED + 1))
fi
echo ""
# Test 7: Git commits show proper workflow
echo "Test 7: Git commit history..."
commit_count=$(git -C "$TEST_PROJECT" log --oneline | wc -l)
if [ "$commit_count" -gt 2 ]; then # Initial + at least 2 task commits
echo " [PASS] Multiple commits created ($commit_count total)"
else
echo " [FAIL] Too few commits ($commit_count, expected >2)"
FAILED=$((FAILED + 1))
fi
echo ""
# Test 8: Check for extra features (spec compliance should catch)
echo "Test 8: No extra features added (spec compliance)..."
if grep -q "export function divide\|export function power\|export function subtract" "$TEST_PROJECT/src/math.js" 2>/dev/null; then
echo " [WARN] Extra features found (spec review should have caught this)"
# Not failing on this as it tests reviewer effectiveness
else
echo " [PASS] No extra features added"
fi
echo ""
# Token Usage Analysis
echo "========================================="
echo " Token Usage Analysis"
echo "========================================="
echo ""
python3 "$SCRIPT_DIR/analyze-token-usage.py" "$SESSION_FILE"
echo ""
# Summary
echo "========================================"
echo " Test Summary"
echo "========================================"
echo ""
if [ $FAILED -eq 0 ]; then
echo "STATUS: PASSED"
echo "All verification tests passed!"
echo ""
echo "The subagent-driven-development skill correctly:"
echo " ✓ Reads plan once at start"
echo " ✓ Provides full task text to subagents"
echo " ✓ Enforces self-review"
echo " ✓ Runs spec compliance before code quality"
echo " ✓ Spec reviewer verifies independently"
echo " ✓ Produces working implementation"
exit 0
else
echo "STATUS: FAILED"
echo "Failed $FAILED verification tests"
echo ""
echo "Output saved to: $OUTPUT_FILE"
echo ""
echo "Review the output to see what went wrong."
exit 1
fi