137 lines
4.2 KiB
Bash
137 lines
4.2 KiB
Bash
#!/bin/bash
|
|
# Test explicit skill requests (user names a skill directly)
|
|
# Usage: ./run-test.sh <skill-name> <prompt-file>
|
|
#
|
|
# Tests whether Claude invokes a skill when the user explicitly requests it by name
|
|
# (without using the plugin namespace prefix)
|
|
#
|
|
# Uses isolated HOME to avoid user context interference
|
|
|
|
set -e
|
|
|
|
SKILL_NAME="$1"
|
|
PROMPT_FILE="$2"
|
|
MAX_TURNS="${3:-3}"
|
|
|
|
if [ -z "$SKILL_NAME" ] || [ -z "$PROMPT_FILE" ]; then
|
|
echo "Usage: $0 <skill-name> <prompt-file> [max-turns]"
|
|
echo "Example: $0 subagent-driven-development ./prompts/subagent-driven-development-please.txt"
|
|
exit 1
|
|
fi
|
|
|
|
# Get the directory where this script lives
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
# Get the superpowers plugin root (two levels up)
|
|
PLUGIN_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
|
|
|
TIMESTAMP=$(date +%s)
|
|
OUTPUT_DIR="/tmp/superpowers-tests/${TIMESTAMP}/explicit-skill-requests/${SKILL_NAME}"
|
|
mkdir -p "$OUTPUT_DIR"
|
|
|
|
# Read prompt from file
|
|
PROMPT=$(cat "$PROMPT_FILE")
|
|
|
|
echo "=== Explicit Skill Request Test ==="
|
|
echo "Skill: $SKILL_NAME"
|
|
echo "Prompt file: $PROMPT_FILE"
|
|
echo "Max turns: $MAX_TURNS"
|
|
echo "Output dir: $OUTPUT_DIR"
|
|
echo ""
|
|
|
|
# Copy prompt for reference
|
|
cp "$PROMPT_FILE" "$OUTPUT_DIR/prompt.txt"
|
|
|
|
# Create a minimal project directory for the test
|
|
PROJECT_DIR="$OUTPUT_DIR/project"
|
|
mkdir -p "$PROJECT_DIR/docs/plans"
|
|
|
|
# Create a dummy plan file for mid-conversation tests
|
|
cat > "$PROJECT_DIR/docs/plans/auth-system.md" << 'EOF'
|
|
# Auth System Implementation Plan
|
|
|
|
## Task 1: Add User Model
|
|
Create user model with email and password fields.
|
|
|
|
## Task 2: Add Auth Routes
|
|
Create login and register endpoints.
|
|
|
|
## Task 3: Add JWT Middleware
|
|
Protect routes with JWT validation.
|
|
EOF
|
|
|
|
# Run Claude with isolated environment
|
|
LOG_FILE="$OUTPUT_DIR/claude-output.json"
|
|
cd "$PROJECT_DIR"
|
|
|
|
echo "Plugin dir: $PLUGIN_DIR"
|
|
echo "Running claude -p with explicit skill request..."
|
|
echo "Prompt: $PROMPT"
|
|
echo ""
|
|
|
|
timeout 300 claude -p "$PROMPT" \
|
|
--plugin-dir "$PLUGIN_DIR" \
|
|
--dangerously-skip-permissions \
|
|
--max-turns "$MAX_TURNS" \
|
|
--output-format stream-json \
|
|
> "$LOG_FILE" 2>&1 || true
|
|
|
|
echo ""
|
|
echo "=== Results ==="
|
|
|
|
# Check if skill was triggered (look for Skill tool invocation)
|
|
# Match either "skill":"skillname" or "skill":"namespace:skillname"
|
|
SKILL_PATTERN='"skill":"([^"]*:)?'"${SKILL_NAME}"'"'
|
|
if grep -q '"name":"Skill"' "$LOG_FILE" && grep -qE "$SKILL_PATTERN" "$LOG_FILE"; then
|
|
echo "PASS: Skill '$SKILL_NAME' was triggered"
|
|
TRIGGERED=true
|
|
else
|
|
echo "FAIL: Skill '$SKILL_NAME' was NOT triggered"
|
|
TRIGGERED=false
|
|
fi
|
|
|
|
# Show what skills WERE triggered
|
|
echo ""
|
|
echo "Skills triggered in this run:"
|
|
grep -o '"skill":"[^"]*"' "$LOG_FILE" 2>/dev/null | sort -u || echo " (none)"
|
|
|
|
# Check if Claude took action BEFORE invoking the skill (the failure mode)
|
|
echo ""
|
|
echo "Checking for premature action..."
|
|
|
|
# Look for tool invocations before the Skill invocation
|
|
# This detects the failure mode where Claude starts doing work without loading the skill
|
|
FIRST_SKILL_LINE=$(grep -n '"name":"Skill"' "$LOG_FILE" | head -1 | cut -d: -f1)
|
|
if [ -n "$FIRST_SKILL_LINE" ]; then
|
|
# Check if any non-Skill, non-system tools were invoked before the first Skill invocation
|
|
# Filter out system messages, TodoWrite (planning is ok), and other non-action tools
|
|
PREMATURE_TOOLS=$(head -n "$FIRST_SKILL_LINE" "$LOG_FILE" | \
|
|
grep '"type":"tool_use"' | \
|
|
grep -v '"name":"Skill"' | \
|
|
grep -v '"name":"TodoWrite"' || true)
|
|
if [ -n "$PREMATURE_TOOLS" ]; then
|
|
echo "WARNING: Tools invoked BEFORE Skill tool:"
|
|
echo "$PREMATURE_TOOLS" | head -5
|
|
echo ""
|
|
echo "This indicates Claude started working before loading the requested skill."
|
|
else
|
|
echo "OK: No premature tool invocations detected"
|
|
fi
|
|
else
|
|
echo "WARNING: No Skill invocation found at all"
|
|
fi
|
|
|
|
# Show first assistant message
|
|
echo ""
|
|
echo "First assistant response (truncated):"
|
|
grep '"type":"assistant"' "$LOG_FILE" | head -1 | jq -r '.message.content[0].text // .message.content' 2>/dev/null | head -c 500 || echo " (could not extract)"
|
|
|
|
echo ""
|
|
echo "Full log: $LOG_FILE"
|
|
echo "Timestamp: $TIMESTAMP"
|
|
|
|
if [ "$TRIGGERED" = "true" ]; then
|
|
exit 0
|
|
else
|
|
exit 1
|
|
fi
|