#!/bin/bash # Test explicit skill requests (user names a skill directly) # Usage: ./run-test.sh # # Tests whether Claude invokes a skill when the user explicitly requests it by name # (without using the plugin namespace prefix) # # Uses isolated HOME to avoid user context interference set -e SKILL_NAME="$1" PROMPT_FILE="$2" MAX_TURNS="${3:-3}" if [ -z "$SKILL_NAME" ] || [ -z "$PROMPT_FILE" ]; then echo "Usage: $0 [max-turns]" echo "Example: $0 subagent-driven-development ./prompts/subagent-driven-development-please.txt" exit 1 fi # Get the directory where this script lives SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" # Get the superpowers plugin root (two levels up) PLUGIN_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)" TIMESTAMP=$(date +%s) OUTPUT_DIR="/tmp/superpowers-tests/${TIMESTAMP}/explicit-skill-requests/${SKILL_NAME}" mkdir -p "$OUTPUT_DIR" # Read prompt from file PROMPT=$(cat "$PROMPT_FILE") echo "=== Explicit Skill Request Test ===" echo "Skill: $SKILL_NAME" echo "Prompt file: $PROMPT_FILE" echo "Max turns: $MAX_TURNS" echo "Output dir: $OUTPUT_DIR" echo "" # Copy prompt for reference cp "$PROMPT_FILE" "$OUTPUT_DIR/prompt.txt" # Create a minimal project directory for the test PROJECT_DIR="$OUTPUT_DIR/project" mkdir -p "$PROJECT_DIR/docs/plans" # Create a dummy plan file for mid-conversation tests cat > "$PROJECT_DIR/docs/plans/auth-system.md" << 'EOF' # Auth System Implementation Plan ## Task 1: Add User Model Create user model with email and password fields. ## Task 2: Add Auth Routes Create login and register endpoints. ## Task 3: Add JWT Middleware Protect routes with JWT validation. EOF # Run Claude with isolated environment LOG_FILE="$OUTPUT_DIR/claude-output.json" cd "$PROJECT_DIR" echo "Plugin dir: $PLUGIN_DIR" echo "Running claude -p with explicit skill request..." echo "Prompt: $PROMPT" echo "" timeout 300 claude -p "$PROMPT" \ --plugin-dir "$PLUGIN_DIR" \ --dangerously-skip-permissions \ --max-turns "$MAX_TURNS" \ --output-format stream-json \ > "$LOG_FILE" 2>&1 || true echo "" echo "=== Results ===" # Check if skill was triggered (look for Skill tool invocation) # Match either "skill":"skillname" or "skill":"namespace:skillname" SKILL_PATTERN='"skill":"([^"]*:)?'"${SKILL_NAME}"'"' if grep -q '"name":"Skill"' "$LOG_FILE" && grep -qE "$SKILL_PATTERN" "$LOG_FILE"; then echo "PASS: Skill '$SKILL_NAME' was triggered" TRIGGERED=true else echo "FAIL: Skill '$SKILL_NAME' was NOT triggered" TRIGGERED=false fi # Show what skills WERE triggered echo "" echo "Skills triggered in this run:" grep -o '"skill":"[^"]*"' "$LOG_FILE" 2>/dev/null | sort -u || echo " (none)" # Check if Claude took action BEFORE invoking the skill (the failure mode) echo "" echo "Checking for premature action..." # Look for tool invocations before the Skill invocation # This detects the failure mode where Claude starts doing work without loading the skill FIRST_SKILL_LINE=$(grep -n '"name":"Skill"' "$LOG_FILE" | head -1 | cut -d: -f1) if [ -n "$FIRST_SKILL_LINE" ]; then # Check if any non-Skill, non-system tools were invoked before the first Skill invocation # Filter out system messages, TodoWrite (planning is ok), and other non-action tools PREMATURE_TOOLS=$(head -n "$FIRST_SKILL_LINE" "$LOG_FILE" | \ grep '"type":"tool_use"' | \ grep -v '"name":"Skill"' | \ grep -v '"name":"TodoWrite"' || true) if [ -n "$PREMATURE_TOOLS" ]; then echo "WARNING: Tools invoked BEFORE Skill tool:" echo "$PREMATURE_TOOLS" | head -5 echo "" echo "This indicates Claude started working before loading the requested skill." else echo "OK: No premature tool invocations detected" fi else echo "WARNING: No Skill invocation found at all" fi # Show first assistant message echo "" echo "First assistant response (truncated):" grep '"type":"assistant"' "$LOG_FILE" | head -1 | jq -r '.message.content[0].text // .message.content' 2>/dev/null | head -c 500 || echo " (could not extract)" echo "" echo "Full log: $LOG_FILE" echo "Timestamp: $TIMESTAMP" if [ "$TRIGGERED" = "true" ]; then exit 0 else exit 1 fi