diff --git a/.gitea/ci/thirdparty_skills.json b/.gitea/ci/thirdparty_skills.json
index 50a47439..c3b7a03d 100644
--- a/.gitea/ci/thirdparty_skills.json
+++ b/.gitea/ci/thirdparty_skills.json
@@ -58,7 +58,7 @@
"sync_mode": "copy_skill_dirs",
"source_list": "skills/thirdparty/.sources/codebase-recon.list",
"skills_subdir": "plugins/outfitter/skills",
- "include_skill_dirs": ["codebase-recon"]
+ "include_skill_dirs": ["codebase-recon", "pathfinding"]
},
{
"id": "codebase-migrate",
diff --git a/skills/README.md b/skills/README.md
index d4c81166..c89d46f6 100644
--- a/skills/README.md
+++ b/skills/README.md
@@ -64,9 +64,18 @@
| `brooks-test` | `brooks-lint` | 测试质量审查 |
| `_shared` | `brooks-lint` | brooks 系列 skill 共享参考资料,不是独立使用的 skill |
+### outfitter-dev/agents Source
+
+`outfitter-dev/agents` 是一个较大的 agents 仓库。当前只登记并同步其中和
+代码库侦察相关的下游项:
+
+| 下游项 | 隶属 | 作用 |
+| --- | --- | --- |
+| `codebase-recon` | `outfitter-dev/agents` | 通过 git 历史和代码结构做 risk scan、热点分析、重构前侦察 |
+| `pathfinding` | `outfitter-dev/agents` | 需求澄清和 confidence calibration;同时补齐 `codebase-recon` 引用的 `confidence.md` |
+
### Single-skill Sources
| Skill | 来源 | 作用 |
| --- | --- | --- |
-| `codebase-recon` | `outfitter-dev/agents` | 通过 git 历史和代码结构做 risk scan、热点分析、重构前侦察 |
| `codebase-migrate` | `awesome-codex-skills` | 大代码库迁移、多文件 refactor、分批变更与 CI 验证工作流 |
diff --git a/skills/thirdparty/.sources/codebase-recon.list b/skills/thirdparty/.sources/codebase-recon.list
index efd4ed2b..7175a423 100644
--- a/skills/thirdparty/.sources/codebase-recon.list
+++ b/skills/thirdparty/.sources/codebase-recon.list
@@ -1 +1,2 @@
codebase-recon
+pathfinding
diff --git a/skills/thirdparty/pathfinding/SKILL.md b/skills/thirdparty/pathfinding/SKILL.md
new file mode 100644
index 00000000..e092ecc7
--- /dev/null
+++ b/skills/thirdparty/pathfinding/SKILL.md
@@ -0,0 +1,262 @@
+---
+name: pathfinding
+description: This skill should be used when requirements are unclear, brainstorming ideas, or when "pathfind", "brainstorm", "figure out", "clarify requirements", or "work through" are mentioned.
+metadata:
+ version: "2.0.0"
+---
+
+# Pathfinding
+
+Adaptive Q&A → unclear requirements → clear path.
+
+
+
+- Ambiguous/incomplete requirements
+- Complex features needing exploration
+- Greenfield projects with open questions
+- Collaborative brainstorming or problem solving
+
+NOT for: time-critical bugs, well-defined tasks, obvious questions
+
+
+
+
+
+| Bar | Lvl | % | Name | Action |
+|-----|-----|---|------|--------|
+| `░░░░░` | 0 | 0–19 | Prepping | Gather foundational context |
+| `▓░░░░` | 1 | 20–39 | Scouting | Ask broad questions |
+| `▓▓░░░` | 2 | 40–59 | Exploring | Ask focusing questions |
+| `▓▓▓░░` | 3 | 60–74 | Charting | Risky to proceed; gaps remain |
+| `▓▓▓▓░` | 4 | 75–89 | Mapped | Viable; push toward 5 |
+| `▓▓▓▓▓` | 5 | 90–100 | Ready | Deliver |
+
+Start honest. Clear request → level 4–5. Vague → level 0–2.
+
+At level 4: "Can proceed, but 1–2 more questions would reach full confidence. Continue or deliver now?"
+
+Below level 5: include `△ Caveats` section.
+
+
+
+
+
+Load the **maintain-tasks** skill for stage tracking. Stages advance only, never regress.
+
+| Stage | Trigger | activeForm |
+|-------|---------|------------|
+| Prep | level 0–1 | "Prepping" |
+| Explore | level 2–3 | "Exploring" |
+| Clarify | level 4 | "Clarifying" |
+| Deliver | level 5 | "Delivering" |
+
+Task format — each stage gets context-specific title:
+
+```text
+- Prep { domain } requirements
+- Explore { approach } options
+- Clarify { key unknowns, 3-4 words }
+- Deliver { artifact type }
+```
+
+Situational (insert before Deliver when triggered):
+- Resolve Conflicts → `◆ Caution` or `◆◆ Hazard` pushback
+- Validate Assumptions → high-risk assumptions before delivery
+
+Workflow:
+- Start: Create stage matching initial confidence `in_progress`
+- Transition: Mark current `completed`, add next `in_progress`
+- High start (4+): Skip directly to `Clarify` or `Deliver`
+- Early delivery: Skip to `Deliver` + `△ Caveats`
+
+
+
+
+
+Calibrate first — user may have already provided context (docs, prior conversation, pointed you at files). If enough context exists, skip to level 3–4. Don't re-ask what's already clear.
+
+If gaps remain, explore focus areas (pick what's relevant):
+- Purpose: What problem? Why now?
+- Constraints: Time, tech, team, dependencies
+- Success: How will we know it works?
+- Scope: What's in, what's out?
+
+When multiple approaches exist:
+- Propose 2–3 options with trade-offs
+- Lead with recommendation ★ and reasoning
+- Let user pick, combine, or redirect
+
+Principles:
+- YAGNI — cut what's not needed
+- DRY — don't duplicate effort or logic
+- Simplest thing — prefer boring solutions
+
+
+
+
+
+Use `EnterPlanMode` for each question — enables keyboard navigation of options.
+
+Structure:
+- Prose above tool: context, reasoning, ★ recommendation if clear lean
+- Inside tool: options only (concise, scannable)
+
+At level 0 — start with session intent:
+- Quick pulse check vs deep dive?
+- Exploring possibilities or solving a specific problem?
+- What does "done" look like?
+
+Levels 1–4 — focus on substance:
+- 2–4 options per question + "5. Something else"
+- Inline `[★]` on recommended option + *italicized rationale*
+- User replies: number, modifications, or combos
+
+
+
+
+
+Loop: Answer → Restate → Update Confidence → Next action
+
+After each answer emit:
+- Confidence: {BAR} {NAME}
+- Assumptions: { if material }
+- Unknowns: { what we can clarify; note unknowables when relevant }
+- Decisions: { what's locked in }
+- Concerns: { what feels off + why }
+
+Next action by level:
+- 0–2: Ask clarifying questions
+- 3: Summarize (3 bullets max), fork toward 5
+- 4: Offer choice: refine or proceed
+- 5: Deliver
+
+
+
+
+
+When answer reveals a concern mid-stream:
+- Pause before next question
+- Surface with `△` + brief description
+- Ask: clarify now, note for later, or proceed with assumption?
+
+Example: "△ This assumes the API supports batch operations — clarify now, note for later, or proceed?"
+
+If user proceeds despite significant gap → escalate to `pushback` protocol.
+
+
+
+
+
+Escalate when choice conflicts with goals/constraints/best practices:
+
+- `◇ Alternative`: Minor misalignment. Present option + reasoning.
+- `◆ Caution`: Clear conflict. Recommend alternative, explain risks, ask to proceed. Triggers Resolve Conflicts.
+- `◆◆ Hazard`: High failure risk. Require mitigation or explicit override. Triggers Resolve Conflicts.
+
+Override: Accept "Proceed anyway: {REASON}" → log in next reflection → mark Resolve Conflicts complete.
+
+
+
+
+
+Integrate skeptic agent for complexity sanity checks:
+
+**Recommend** (offer choice):
+- Level 5 reached with △ Caveats > 2
+- Red flag language in decisions: "might need later", "more flexible", "best practice"
+
+```text
+Before finalizing — you have {N} caveats. Want to run skeptic for a sanity check?
+[AskUserQuestion]
+1. Yes, quick check [★] — I'll challenge complexity interactively
+2. Yes, deep analysis — launch skeptic agent in background
+3. No, proceed — deliver as-is
+```
+
+**Auto-invoke** (no choice):
+- Level 4+ with 3+ unknowns persisting across 2+ question cycles
+- ◆◆ Hazard escalation triggered during session
+
+When auto-invoking:
+
+```text
+[Auto-invoking skeptic — {REASON}]
+```
+
+Launch with Task tool:
+- subagent_type: "outfitter:skeptic"
+- prompt: Include current decisions, unknowns, and caveats
+- run_in_background: false (wait for findings before delivery)
+
+After skeptic returns:
+- Present findings to user
+- If verdict is `block` → add Resolve Conflicts stage
+- If verdict is `caution` → offer choice to address or acknowledge
+- If verdict is `proceed` → continue to delivery
+
+
+
+
+
+Level 5: Produce artifact immediately (doc, plan, code, outline). If none specified, suggest one.
+
+After delivering, ask where to persist (if applicable):
+
+```text
+[EnterPlanMode]
+1. { discovered path } [★] — { source: `CLAUDE.md` preference | existing directory | convention }
+2. Create issue — { Linear/GitHub/Beads based on project context }
+3. ADR — { if architectural decision }
+4. Don't persist — keep in conversation only
+5. Something else — different location or format
+```
+
+Discovery order for option 1:
+1. `CLAUDE.md` or project instructions with explicit plan storage preference
+2. Existing `.agents/plans/` directory
+3. Existing `docs/plans/` directory
+4. Fall back to `.agents/plans/` if nothing found
+
+Always suggest filename based on topic. Match existing conventions if present.
+
+Mark Deliver `completed` after artifact is delivered (persistence is optional follow-up).
+
+Below 5: Append `△ Caveats`:
+- Open questions + context
+- Assumed decisions + defaults
+- Known concerns + impact
+- Deferred items + revisit timing
+
+
+
+
+
+ALWAYS:
+- Task stage matching initial confidence at start
+- `EnterPlanMode` for each question (keyboard nav)
+- Prose above tool for context + ★ recommendation
+- One question at a time, wait for response
+- Restate + update confidence before next move
+- Update todos at level 4, level 5 thresholds
+- Apply pushback protocol on conflicts
+- Check skeptic triggers at level 4+ (unknowns, caveats, red flags)
+
+NEVER:
+- Proceed from 0–3 without clarifying questions
+- Hide uncertainty below level 5
+- Stack questions or bury decisions in paragraphs
+- Put recommendation inside plan tool (keep in prose)
+- Skip reflection after answer
+- Regress stages
+- Ignore skeptic auto-invoke triggers
+
+
+
+
+
+- [confidence.md](references/confidence.md) — confidence deep dive
+- [questions.md](references/questions.md) — question crafting
+- [examples/](examples/) — session examples
+- skeptic agent (outfitter:skeptic) — complexity sanity checks
+
+
diff --git a/skills/thirdparty/pathfinding/examples/early-delivery.md b/skills/thirdparty/pathfinding/examples/early-delivery.md
new file mode 100644
index 00000000..13b6f5d1
--- /dev/null
+++ b/skills/thirdparty/pathfinding/examples/early-delivery.md
@@ -0,0 +1,344 @@
+# Example: Early Delivery with Caveats
+
+User requests delivery before reaching full confidence — demonstrates `△ Caveats` and mid-stream concerns.
+
+
+
+
+I want to add real-time notifications to our app. Users should get notified when someone comments on their posts.
+
+
+
+
+```text
+[Task] Getting oriented
+- Prep notification requirements (in_progress)
+- Explore delivery mechanisms (pending)
+- Clarify scale and persistence (pending)
+- Deliver implementation plan (pending)
+```
+
+```text
+▓▓░░░ Exploring
+
+Assumptions:
+- Existing app with posts and comments
+- User accounts with some form of auth
+- Web and/or mobile clients
+
+Unknowns:
+- Current tech stack?
+- Scale (users, notifications/day)?
+- Notification types (just comments, or more)?
+- Persistence (history, read/unread)?
+- Push notifications (mobile) needed?
+
+Decisions:
+- Real-time comment notifications
+```
+
+**Q1**: What's your current tech stack?
+
+*Why it matters — determines which real-time approach fits best*
+
+```text
+[EnterPlanMode]
+1. Node.js backend — Socket.io, ws, or SSE
+2. Rust backend — Axum with WebSockets
+3. Serverless (Vercel, AWS Lambda) — needs external service (Pusher, Ably)
+4. Supabase — built-in realtime, minimal setup
+5. Something else — describe your stack
+```
+
+
+
+
+
+
+
+Node.js with Express, React frontend, PostgreSQL
+
+
+
+
+```text
+Classic PERN stack — lots of good options for real-time.
+
+▓▓▓░░ Charting
+
+Assumptions:
+- Can add WebSocket server alongside Express
+- React can handle WebSocket connections
+- PostgreSQL for notification storage
+
+Unknowns:
+- Scale expectations?
+- Mobile app too, or web only?
+- Need notification history/persistence?
+
+Decisions:
+- Node.js + Express + React + PostgreSQL
+- Real-time comment notifications
+
+Concerns:
+- WebSocket scaling can get tricky with multiple server instances — may need Redis pub/sub or sticky sessions
+```
+
+```text
+[Task] Shape emerging
+- Prep notification requirements (completed)
+- Explore delivery mechanisms (in_progress)
+- Clarify scale and persistence (pending)
+- Deliver implementation plan (pending)
+```
+
+**Q2**: What scale are you expecting?
+
+*Why it matters — affects architecture complexity significantly*
+
+```text
+[EnterPlanMode]
+1. Small (< 1k users) [★] — single server, simple WebSockets *start here, scale later*
+2. Medium (1k-50k users) — needs Redis pub/sub for multi-server
+3. Large (50k+ users) — dedicated service (Pusher, Ably, or self-hosted)
+5. Something else — different constraints
+```
+
+
+
+
+
+
+
+We're small now but hoping to grow. Can we just get something working? I can come back for scaling later.
+
+
+
+
+```text
+Makes sense — ship something simple, optimize when needed.
+
+▓▓▓░░ Charting
+
+△ This assumes single-server deployment works for now — if you're already multi-instance, we'd need Redis pub/sub from the start. Clarify now, note for later, or proceed?
+```
+
+
+
+
+
+
+
+Single server for now, let's proceed
+
+
+
+
+```text
+Good — simple path forward.
+
+▓▓▓▓░ Mapped
+
+Assumptions:
+- Single server deployment
+- Web only (no mobile push for now)
+- Basic notification (no persistence/history initially)
+
+Unknowns:
+- Want notification persistence (history, read/unread)?
+
+Decisions:
+- Socket.io on Node.js (simplest for Express)
+- Single-server architecture
+- Real-time comment notifications
+
+Concerns:
+- No persistence means notifications lost on refresh — might frustrate users
+```
+
+```text
+[Task] Getting close
+- Prep notification requirements (completed)
+- Explore delivery mechanisms (completed)
+- Clarify scale and persistence (in_progress)
+- Deliver implementation plan (pending)
+```
+
+**Q3**: Should notifications persist? (history, read/unread state)
+
+*Why it matters — affects database schema and UX significantly*
+
+```text
+[EnterPlanMode]
+1. No persistence — real-time only, notifications disappear on refresh
+2. Basic persistence [★] — store in DB, show unread count *better UX, moderate effort*
+3. Full history — notification center, mark as read, pagination
+5. Something else — different approach
+```
+
+
+
+
+
+
+
+Actually, can you just give me what you have? I need to ship something today and can iterate.
+
+
+
+
+```text
+Understood — shipping now, iterating later.
+
+▓▓▓░░ Charting → Deliver (early)
+
+Skipping to delivery at level 3. Including caveats for what's unresolved.
+```
+
+```text
+[Task] Early delivery — shipping with caveats
+- Prep notification requirements (completed)
+- Explore delivery mechanisms (completed)
+- Clarify scale and persistence (completed)
+- Deliver implementation plan (in_progress)
+```
+
+## Real-Time Notifications — MVP Plan
+
+**Architecture**:
+
+```text
+┌─────────┐ WebSocket ┌─────────────┐
+│ React │◄──────────────────►│ Express │
+│ Client │ │ + Socket.io│
+└─────────┘ └──────┬──────┘
+ │
+ ┌──────▼──────┐
+ │ PostgreSQL │
+ │ (comments) │
+ └─────────────┘
+```
+
+**Server Setup** (add to existing Express app):
+
+```typescript
+// server.ts
+import { Server } from 'socket.io';
+import { createServer } from 'http';
+
+const httpServer = createServer(app);
+const io = new Server(httpServer, {
+ cors: { origin: process.env.CLIENT_URL }
+});
+
+// Track connected users
+const userSockets = new Map(); // userId -> socketId
+
+io.on('connection', (socket) => {
+ const userId = socket.handshake.auth.userId;
+ userSockets.set(userId, socket.id);
+
+ socket.on('disconnect', () => {
+ userSockets.delete(userId);
+ });
+});
+
+// Export for use in routes
+export { io, userSockets };
+```
+
+**Emit on Comment Creation**:
+
+```typescript
+// routes/comments.ts
+import { io, userSockets } from '../server';
+
+app.post('/posts/:postId/comments', async (req, res) => {
+ const comment = await createComment(req.body);
+ const post = await getPost(req.params.postId);
+
+ // Notify post author (if not self-comment)
+ if (post.authorId !== req.user.id) {
+ const socketId = userSockets.get(post.authorId);
+ if (socketId) {
+ io.to(socketId).emit('notification', {
+ type: 'comment',
+ postId: post.id,
+ commentId: comment.id,
+ fromUser: req.user.name,
+ message: `${req.user.name} commented on your post`
+ });
+ }
+ }
+
+ res.json(comment);
+});
+```
+
+**React Client**:
+
+```typescript
+// hooks/useNotifications.ts
+import { useEffect } from 'react';
+import { io } from 'socket.io-client';
+import { toast } from 'your-toast-library';
+
+export function useNotifications(userId: string) {
+ useEffect(() => {
+ const socket = io(process.env.REACT_APP_API_URL, {
+ auth: { userId }
+ });
+
+ socket.on('notification', (data) => {
+ toast(data.message);
+ // Could also update a notification count in state
+ });
+
+ return () => { socket.disconnect(); };
+ }, [userId]);
+}
+```
+
+**Next Steps**:
+
+1. `npm install socket.io` (server) and `socket.io-client` (client)
+2. Add Socket.io to your Express server
+3. Wire up comment creation to emit notifications
+4. Add `useNotifications` hook to your app root
+
+---
+
+## △ Caveats
+
+**Open questions**:
+- Notification persistence — currently lost on refresh
+- Mobile push — not addressed, would need separate implementation
+
+**Assumed decisions**:
+- Single server deployment — will break with multiple instances
+- No read/unread tracking — no notification history
+- Toast-only UI — no notification center
+
+**Known concerns**:
+- User might miss notifications if offline when sent
+- No retry mechanism for failed deliveries
+- Socket reconnection handling is minimal
+
+**Deferred items**:
+- Redis pub/sub for multi-server (when you scale)
+- Notification persistence in PostgreSQL (soon — improves UX)
+- Mobile push notifications (when you add mobile app)
+- Notification preferences (mute, frequency)
+
+```text
+[Task] Done — shipped with caveats
+- Prep notification requirements (completed)
+- Explore delivery mechanisms (completed)
+- Clarify scale and persistence (completed)
+- Deliver implementation plan (completed)
+```
+
+Shipped — MVP real-time notifications ready to go. Come back when you need persistence or scaling.
+
+
+
diff --git a/skills/thirdparty/pathfinding/examples/greenfield-api.md b/skills/thirdparty/pathfinding/examples/greenfield-api.md
new file mode 100644
index 00000000..9dfef617
--- /dev/null
+++ b/skills/thirdparty/pathfinding/examples/greenfield-api.md
@@ -0,0 +1,342 @@
+# Example: Greenfield API Design
+
+Starting from zero — vague request, no context.
+
+
+
+
+I need to build an API
+
+
+
+
+```text
+[Task] Getting oriented
+- Prep API requirements (in_progress)
+- Explore architecture options (pending)
+- Clarify data and auth needs (pending)
+- Deliver API design doc (pending)
+```
+
+```text
+░░░░░ Prepping
+
+Unknowns:
+- What domain/purpose?
+- Who consumes it?
+- What data?
+- Performance/scale needs?
+- Existing infrastructure?
+```
+
+**Q1**: Before we dive in — what kind of session are you looking for?
+
+*Why it matters — sets the right depth and pace*
+
+```text
+[EnterPlanMode]
+1. Quick pulse check [★] — nail down key decisions, move fast *good for exploring direction*
+2. Deep dive — thorough exploration, comprehensive plan
+3. Specific problem — I know what I want, need help with one aspect
+5. Something else — describe what you're after
+```
+
+
+
+
+
+
+
+1, I have a rough idea but need to figure out the shape
+
+
+
+
+```text
+Got it — quick pulse to find the shape, then we can go deeper where needed.
+
+▓░░░░ Scouting
+
+Decisions:
+- Quick pulse session, fast iteration
+```
+
+**Q2**: What's this API for?
+
+*Why it matters — determines data model, auth strategy, and scaling approach*
+
+```text
+[EnterPlanMode]
+1. Internal tool — dashboard, admin, ops
+2. Public product API — third-party developers
+3. Mobile/web backend — your own apps
+4. B2B integration — partner systems
+5. Something else — describe the use case
+```
+
+
+
+
+
+
+
+3, it's for a fitness tracking app
+
+
+
+
+```text
+Fitness tracking backend — workouts, metrics, user data. Classic mobile backend pattern.
+
+▓▓░░░ Exploring
+
+Assumptions:
+- User accounts with personal data
+- Workout/activity logging
+- Progress tracking over time
+- Mobile-first (iOS/Android)
+
+Unknowns:
+- Social features? (friends, sharing, leaderboards)
+- Device integrations? (Apple Health, Fitbit, etc.)
+- Real-time needs? (live workouts, streaming)
+- Offline sync requirements?
+
+Decisions:
+- Mobile/web backend for fitness app
+- Quick pulse session
+```
+
+**Q3**: What's the core loop users will do most often?
+
+*Why it matters — shapes the data model and API surface*
+
+```text
+[EnterPlanMode]
+1. Log workouts manually [★] — exercises, sets, reps, weights *simplest starting point*
+2. Track metrics — weight, body measurements, progress photos
+3. Follow programs — structured plans, scheduled workouts
+4. Social/compete — friends, challenges, leaderboards
+5. Something else — different core interaction
+```
+
+
+
+
+
+
+
+1 and 2, logging workouts and tracking body metrics
+
+
+
+
+```text
+Core loop: log workouts + track body metrics. Good foundation — can layer social/programs later.
+
+▓▓▓░░ Charting
+
+Assumptions:
+- User model with profile data
+- Workout sessions with exercises
+- Body metrics (weight, measurements) with timestamps
+- Historical data for progress charts
+
+Unknowns:
+- Auth approach?
+- Exercise library (predefined vs custom)?
+- Offline-first or always-online?
+
+Decisions:
+- Mobile backend for fitness app
+- Core: workout logging + body metrics
+- Start simple, extensible for social/programs later
+
+Concerns:
+- Exercise data can get complex (variations, equipment, muscle groups) — start with simple model, don't over-engineer
+```
+
+```text
+[Task] Shape emerging — exploring options
+- Prep API requirements (completed)
+- Explore architecture options (in_progress)
+- Clarify data and auth needs (pending)
+- Deliver API design doc (pending)
+```
+
+**Q4**: How should users authenticate?
+
+*Why it matters — affects security model, user experience, and implementation complexity*
+
+```text
+[EnterPlanMode]
+1. Email/password [★] — simple, universal *good starting point*
+2. Social login (Google/Apple) — better UX, less friction
+3. Both — maximum flexibility
+4. Magic link — passwordless, email-based
+5. Something else — different auth approach
+```
+
+
+
+
+
+
+
+3, I want both options
+
+
+
+
+```text
+Both email/password and social login — covers all users.
+
+▓▓▓▓░ Mapped
+
+Assumptions:
+- JWT tokens for mobile auth
+- Email as primary identifier (link accounts by email)
+- Refresh token rotation for security
+
+Unknowns:
+- Specific social providers (Google, Apple, both)?
+
+Decisions:
+- Dual auth: email/password + social login
+- Workout logging + body metrics core
+- Mobile backend architecture
+
+Concerns:
+- Account linking edge cases (same email, different providers) — handle gracefully
+```
+
+```text
+[Task] Getting close — one more detail
+- Prep API requirements (completed)
+- Explore architecture options (completed)
+- Clarify data and auth needs (in_progress)
+- Deliver API design doc (pending)
+```
+
+Can proceed now, or one more question would nail down the social providers. Your call — deliver now or refine?
+
+
+
+
+
+
+
+Let's just do Google and Apple for now
+
+
+
+
+```text
+Google + Apple — covers iOS and Android users well.
+
+▓▓▓▓▓ Ready
+
+Decisions:
+- Dual auth: email/password + Google + Apple Sign-In
+- Core features: workout logging, body metrics tracking
+- JWT tokens with refresh rotation
+- Mobile-first REST API
+```
+
+```text
+[Task] Clear path — delivering design
+- Prep API requirements (completed)
+- Explore architecture options (completed)
+- Clarify data and auth needs (completed)
+- Deliver API design doc (in_progress)
+```
+
+## Fitness Tracking API Design
+
+**Core Resources**:
+
+```text
+/auth
+ POST /register — email/password signup
+ POST /login — email/password login
+ POST /oauth/google — Google Sign-In
+ POST /oauth/apple — Apple Sign-In
+ POST /refresh — refresh access token
+ POST /logout — invalidate tokens
+
+/users
+ GET /me — current user profile
+ PUT /me — update profile
+ DELETE /me — delete account
+
+/workouts
+ GET / — list workouts (paginated)
+ POST / — create workout
+ GET /:id — get workout details
+ PUT /:id — update workout
+ DELETE /:id — delete workout
+
+/exercises
+ GET / — list exercises (library)
+ POST / — create custom exercise
+
+/metrics
+ GET / — list body metrics (paginated)
+ POST / — log new metric
+ GET /summary — progress summary (charts data)
+```
+
+**Data Models**:
+
+```text
+User {
+ id, email, name, avatar_url,
+ auth_provider: "email" | "google" | "apple",
+ created_at, updated_at
+}
+
+Workout {
+ id, user_id, name, started_at, ended_at,
+ exercises: [{ exercise_id, sets: [{ reps, weight, duration }] }],
+ notes, created_at
+}
+
+Exercise {
+ id, name, category, muscle_groups[],
+ is_custom, user_id (if custom)
+}
+
+BodyMetric {
+ id, user_id, type: "weight" | "measurement",
+ value, unit, measured_at
+}
+```
+
+**Tech Stack Suggestion**:
+
+- **API**: Hono or Express on Bun
+- **Database**: PostgreSQL (Supabase or Neon)
+- **Auth**: Custom JWT or Supabase Auth
+- **Hosting**: Railway, Fly.io, or Vercel
+
+**Next Steps**:
+
+1. Set up project with Bun + Hono
+2. Configure PostgreSQL with migrations
+3. Implement auth endpoints first
+4. Build workout CRUD
+5. Add body metrics
+6. Deploy MVP
+
+```text
+[Task] Done — design shipped
+- Prep API requirements (completed)
+- Explore architecture options (completed)
+- Clarify data and auth needs (completed)
+- Deliver API design doc (completed)
+```
+
+All set — went from "I need an API" to a concrete design. Ready to start building when you are.
+
+
+
diff --git a/skills/thirdparty/pathfinding/examples/high-start.md b/skills/thirdparty/pathfinding/examples/high-start.md
new file mode 100644
index 00000000..826dfa9f
--- /dev/null
+++ b/skills/thirdparty/pathfinding/examples/high-start.md
@@ -0,0 +1,242 @@
+# Example: High Start (Level 4)
+
+Clear request with context — minimal clarification needed, fast to delivery.
+
+
+
+
+I need to add a dark mode toggle to my Next.js app. I'm using Tailwind CSS and want to persist the preference in localStorage. Should work with system preference as a default.
+
+
+
+
+```text
+[Task] Clear requirements — fast path
+- Clarify implementation details (in_progress)
+- Deliver dark mode implementation (pending)
+```
+
+```text
+▓▓▓▓░ Mapped
+
+You've given me most of what I need:
+- Next.js + Tailwind CSS
+- Toggle component
+- localStorage persistence
+- System preference default
+
+Assumptions:
+- Tailwind's built-in dark mode (class strategy)
+- Client-side toggle (no server preference)
+- Standard light/dark, no custom themes
+
+Decisions:
+- Dark mode with localStorage + system fallback
+- Tailwind class strategy
+- Toggle UI component
+```
+
+Can deliver now, or one quick question: where does the toggle live? (header, settings page, floating button)
+
+[★] If you just want the implementation and will place it yourself, I can skip that question.
+
+```text
+[EnterPlanMode]
+1. Skip — just give me the implementation
+2. Header/navbar — always visible
+3. Settings page — dedicated settings area
+4. Floating button — corner of screen
+5. Something else — different placement
+```
+
+
+
+
+
+
+
+1, I'll put it in my header
+
+
+
+
+```text
+▓▓▓▓▓ Ready
+
+All clear — delivering implementation.
+```
+
+```text
+[Task] Delivering
+- Clarify implementation details (completed)
+- Deliver dark mode implementation (in_progress)
+```
+
+## Dark Mode Implementation
+
+**1. Configure Tailwind** (`tailwind.config.js`):
+
+```javascript
+module.exports = {
+ darkMode: 'class',
+ // ... rest of config
+}
+```
+
+**2. Theme Provider** (`components/theme-provider.tsx`):
+
+```typescript
+'use client';
+
+import { createContext, useContext, useEffect, useState } from 'react';
+
+type Theme = 'light' | 'dark' | 'system';
+
+const ThemeContext = createContext<{
+ theme: Theme;
+ setTheme: (theme: Theme) => void;
+}>({ theme: 'system', setTheme: () => {} });
+
+export function ThemeProvider({ children }: { children: React.ReactNode }) {
+ const [theme, setTheme] = useState('system');
+
+ useEffect(() => {
+ const stored = localStorage.getItem('theme') as Theme | null;
+ if (stored) setTheme(stored);
+ }, []);
+
+ useEffect(() => {
+ const root = document.documentElement;
+ const systemDark = window.matchMedia('(prefers-color-scheme: dark)').matches;
+
+ root.classList.remove('light', 'dark');
+
+ if (theme === 'system') {
+ root.classList.add(systemDark ? 'dark' : 'light');
+ } else {
+ root.classList.add(theme);
+ }
+
+ localStorage.setItem('theme', theme);
+ }, [theme]);
+
+ // Listen for system preference changes
+ useEffect(() => {
+ const media = window.matchMedia('(prefers-color-scheme: dark)');
+ const handler = () => {
+ if (theme === 'system') {
+ document.documentElement.classList.remove('light', 'dark');
+ document.documentElement.classList.add(media.matches ? 'dark' : 'light');
+ }
+ };
+ media.addEventListener('change', handler);
+ return () => media.removeEventListener('change', handler);
+ }, [theme]);
+
+ return (
+
+ {children}
+
+ );
+}
+
+export const useTheme = () => useContext(ThemeContext);
+```
+
+**3. Toggle Component** (`components/theme-toggle.tsx`):
+
+```typescript
+'use client';
+
+import { useTheme } from './theme-provider';
+import { Sun, Moon, Monitor } from 'lucide-react'; // or your icon library
+
+export function ThemeToggle() {
+ const { theme, setTheme } = useTheme();
+
+ const cycle = () => {
+ const next = theme === 'light' ? 'dark' : theme === 'dark' ? 'system' : 'light';
+ setTheme(next);
+ };
+
+ return (
+
+ );
+}
+```
+
+**4. Wrap App** (`app/layout.tsx`):
+
+```typescript
+import { ThemeProvider } from '@/components/theme-provider';
+
+export default function RootLayout({ children }: { children: React.ReactNode }) {
+ return (
+
+
+
+ {children}
+
+
+
+ );
+}
+```
+
+**5. Prevent Flash** (add to `` in layout):
+
+```typescript
+
+```
+
+**6. Use in Header**:
+
+```typescript
+import { ThemeToggle } from '@/components/theme-toggle';
+
+export function Header() {
+ return (
+
+ );
+}
+```
+
+**Usage**:
+- Click toggle: cycles light → dark → system → light
+- System mode: follows OS preference automatically
+- Persists in localStorage across sessions
+- No flash on page load
+
+```text
+[Task] Done
+- Clarify implementation details (completed)
+- Deliver dark mode implementation (completed)
+```
+
+All set — drop `` in your header and you're good.
+
+
+
diff --git a/skills/thirdparty/pathfinding/references/confidence.md b/skills/thirdparty/pathfinding/references/confidence.md
new file mode 100644
index 00000000..604f7b1a
--- /dev/null
+++ b/skills/thirdparty/pathfinding/references/confidence.md
@@ -0,0 +1,146 @@
+# Confidence
+
+Confidence reflects certainty that you can deliver the requested outcome with the available information.
+
+## Philosophy
+
+Balance two goals:
+1. **Gather enough** to deliver quality results
+2. **Avoid over-questioning** that frustrates user
+
+Consider:
+- **Clarity**: How well-defined is the ask?
+- **Risk**: What happens if assumptions are wrong?
+- **Complexity**: How many moving parts?
+- **Ambiguity**: How many valid interpretations?
+
+## Level Overview
+
+| Bar | Level | Name | Internal % |
+| --------- | ----- | ------------ | ---------- |
+| `░░░░░` | 0 | **Prepping** | 0–19% |
+| `▓░░░░` | 1 | **Scouting** | 20–39% |
+| `▓▓░░░` | 2 | **Exploring**| 40–59% |
+| `▓▓▓░░` | 3 | **Charting** | 60–74% |
+| `▓▓▓▓░` | 4 | **Mapped** | 75–89% |
+| `▓▓▓▓▓` | 5 | **Ready** | 90–100% |
+
+## Stage Transitions
+
+Confidence levels trigger stage transitions. Stages always advance, never regress.
+
+### Stage-Confidence Mapping
+
+| Level | Stage | activeForm |
+|-------|-------|------------|
+| 0–1 | Prep | "Prepping" |
+| 2–3 | Explore | "Exploring" |
+| 4 | Clarify | "Clarifying" |
+| 5 | Deliver | "Delivering" |
+
+### Rules
+
+1. **No regression**: If confidence drops (4 → 3), stay in current stage
+2. **Skip when starting high**: Level 5 start → go directly to Deliver
+3. **Stage independence**: Confidence can fluctuate within a stage
+4. **Early delivery**: User can request delivery at any stage → add `△ Caveats`
+
+### Edge Cases
+
+**High start**: Clear requirements → Start at Ready, go directly to Deliver
+
+**Confidence drop**: Reach Mapped (4), enter Clarify, then realize gap (drops to 3) → Stay in Clarify, ask targeted questions
+
+**Rapid ascent**: Start at Exploring (2) → one answer jumps to Mapped (4) → next to Ready (5) → transition through stages quickly
+
+### Level 0: Prepping `░░░░░`
+
+**Stage**: Prep
+
+**When**: Request completely unclear, no domain context, pure guessing
+
+**Ask**: Scope, constraints, goals, background
+
+**Example**: "Make it better" with no context about what "it" is.
+
+### Level 1: Scouting `▓░░░░`
+
+**Stage**: Prep
+
+**When**: Vague direction, domain clear but specifics aren't
+
+**Ask**: What system? How big? What's in place?
+
+**Example**: "Improvements to the dashboard" — which kind?
+
+### Level 2: Exploring `▓▓░░░`
+
+**Stage**: Explore
+
+**When**: General area understood, lack critical details, multiple approaches possible
+
+**Ask**: Which approach? What about X? What matters most? Speed vs quality?
+
+**Example**: "Authentication" — method, scale, existing system unknown.
+
+### Level 3: Charting `▓▓▓░░`
+
+**Stage**: Explore
+
+**When**: Reasonable understanding, could deliver with notable assumptions
+
+**Do**:
+1. Summarize (3 bullets max)
+2. Ask 2–3 targeted questions toward level 4–5
+3. If user proceeds early → add `△ Caveats`
+
+**Example**: OAuth login — general approach known, need providers + fallback strategy.
+
+### Level 4: Mapped `▓▓▓▓░`
+
+**Stage**: Clarify
+
+**When**: Solid understanding, few clarifications would reach Ready, low risk
+
+**Do**: Offer choice — "Can proceed, but 1–2 more questions would reach full confidence. Continue or deliver now?"
+
+**Example**: New API endpoint — data model understood, need error handling approach.
+
+### Level 5: Ready `▓▓▓▓▓`
+
+**Stage**: Deliver
+
+**When**: Clear understanding, no major assumptions, minimal risk
+
+**Do**: Produce artifact immediately, succinct next steps, no more questions unless something emerges
+
+**Example**: "Add logout button to header" — clear, specific, low-risk.
+
+## Special Cases
+
+### Starting Confidence
+
+Start honest. Don't artificially start low if the request is clear.
+
+- **Clear request** → level 4–5
+- **Vague request** → level 0–2
+
+### Delivering Below Level 5
+
+User wants quick delivery at lower confidence:
+
+1. Confirm they want to proceed
+2. Add `△ Caveats` section
+3. List assumptions, concerns, unknowns
+
+### Calibration
+
+- Deliver at 5, goes well → calibrated
+- Deliver at 5, miss the mark → overconfident
+- Stay at 0–2 too long → underconfident
+
+## Tuning
+
+Percentage boundaries can adjust based on risk tolerance:
+- **Higher risk tolerance** → shift boundaries down
+- **Lower risk tolerance** → shift boundaries up
diff --git a/skills/thirdparty/pathfinding/references/questions.md b/skills/thirdparty/pathfinding/references/questions.md
new file mode 100644
index 00000000..0acd6e7b
--- /dev/null
+++ b/skills/thirdparty/pathfinding/references/questions.md
@@ -0,0 +1,132 @@
+# Question Format
+
+## Anatomy of a Good Question
+
+**Components**:
+1. **Q{N}**: Question number (for tracking)
+2. **Question**: Clear, specific, focused on one decision
+3. **Why it matters**: One sentence explaining impact
+4. **Options**: 2–4 meaningful choices
+5. **Nuance**: Brief context for each option
+6. **★ Recommendation** (optional): Your lean with reasoning
+
+## Delivery via EnterPlanMode
+
+Use `EnterPlanMode` for each question — enables keyboard navigation.
+
+**Structure**:
+- **Prose above tool**: context, reasoning, ★ recommendation
+- **Inside tool**: options only (concise, scannable)
+
+Don't bury recommendations inside the tool — keep them visible in prose.
+
+## Crafting Options
+
+### Option Count Guidelines
+
+**2 options**: Use when choices are binary or you want to keep it simple
+- Good: "Web app or mobile app?"
+- Avoid: Forcing false dichotomy when more options exist
+
+**3 options**: Sweet spot for most questions
+- Good: Covers main approaches plus one alternative
+- Avoid: Making options too similar
+
+**4 options**: Use when you need a combination or "other"
+- Good: Three distinct approaches + a hybrid option
+- Avoid: Analysis paralysis with too many choices
+
+### Option Quality
+
+**Good options**:
+- Mutually exclusive (can pick only one)
+- Collectively exhaustive (covers reasonable space)
+- Clearly differentiated (not subtle variations)
+- Actionable (leads to concrete next steps)
+
+**Bad options**:
+- Overlapping: "Option 1: Use React. Option 2: Use modern framework."
+- Too similar: "Option 1: 100ms timeout. Option 2: 150ms timeout."
+- Vague: "Option 1: Do it the normal way."
+- Open-ended: "Option 1: Whatever you think is best."
+
+## Why It Matters
+
+The one-sentence explanation serves multiple purposes:
+1. **Context**: Helps user understand why you're asking
+2. **Priority**: Shows this isn't arbitrary
+3. **Decision framing**: Clarifies what depends on this choice
+4. **Respect**: Demonstrates you're not just asking for the sake of asking
+
+**Good examples**:
+- "Why it matters — determines database schema design"
+- "Why it matters — affects performance characteristics and scaling strategy"
+- "Why it matters — impacts user experience for first-time visitors"
+
+**Weak examples**:
+- "Why it matters — I need to know"
+- "Why it matters — this is important"
+- "Why it matters — because"
+
+## Adding Nuance
+
+Each option should include helpful context:
+
+**Good nuance**:
+- Trade-offs: "Faster to implement but less flexible long-term"
+- Implications: "Requires HTTPS and external dependency"
+- Prerequisites: "Need existing user database"
+- Typical use case: "Best for high-traffic applications"
+
+**Weak nuance**:
+- Restating the obvious: "Uses OAuth" (when option says OAuth)
+- Generic statements: "Good option"
+- No information: Just the option name with no context
+
+## Recommendations (★)
+
+Use recommendations when:
+- You have genuine expertise or insight
+- One option clearly fits better for typical cases
+- User seems uncertain or asks for guidance
+
+**Don't recommend when**:
+- Purely user preference (e.g., color scheme)
+- Not enough context yet
+- All options equally valid
+
+**Good**: `1. React [★] — mature ecosystem *best starting point for most teams*`
+
+**Weak**:
+- ★ I like this one
+- ★ Most popular
+- Recommendation buried in prose above options
+
+## User Replies
+
+Number is a shorthand, not a constraint:
+- `2` → selects option 2
+- `2, but with caching` → selection + modification
+- `2 and 3` → combo
+- `What's the difference?` → clarification request
+
+All valid.
+
+## Adaptive Cadence
+
+**Baseline** (~80% of questions):
+- Clear question + one-sentence "why"
+- 2–4 options with brief nuance
+- Inline `[★]` on recommended option
+- Optional: `[★] { expanded reasoning }` in prose above if helpful
+
+**Expand when**:
+- High ambiguity or risk
+- User uncertain or asks for detail
+- Technical complexity needs explanation
+
+**Simplify when**:
+- Straightforward question
+- User shows expertise
+- Question 6+ in session
+- User wants to move faster
diff --git a/skills/thirdparty/thirdparty-skills.yml b/skills/thirdparty/thirdparty-skills.yml
index eaf9d768..7b5626b2 100644
--- a/skills/thirdparty/thirdparty-skills.yml
+++ b/skills/thirdparty/thirdparty-skills.yml
@@ -29,16 +29,20 @@ skills:
sync: enabled
upstream_repo: https://github.com/outfitter-dev/agents
upstream_ref: main
- upstream_path: plugins/outfitter/skills/codebase-recon
- upstream_layout: single_skill_dir
+ upstream_paths:
+ - plugins/outfitter/skills/codebase-recon
+ - plugins/outfitter/skills/pathfinding
+ upstream_layout: multi_skill_subset
selected_for:
- risk scan
- git-history hotspot analysis
- codebase reconnaissance before planning
- refactor-risk assessment
+ - confidence calibration reference dependency
playbook_fit: pre-design and pre-refactor codebase risk discovery
notes:
- - Upstream skill lives inside a larger agents repository.
+ - Upstream skills live inside a larger agents repository.
+ - pathfinding is included because codebase-recon links to pathfinding/references/confidence.md.
- id: codebase-migrate
sync: enabled
diff --git a/tests/test_skills_readme.py b/tests/test_skills_readme.py
index 82515b89..129152a9 100644
--- a/tests/test_skills_readme.py
+++ b/tests/test_skills_readme.py
@@ -36,6 +36,7 @@ class SkillsReadmeTests(unittest.TestCase):
self.assertIn("_shared", text)
self.assertIn("codebase-recon", text)
+ self.assertIn("pathfinding", text)
self.assertIn("codebase-migrate", text)
self.assertIn("已登记待同步", text)
diff --git a/tests/test_thirdparty_skill_curation.py b/tests/test_thirdparty_skill_curation.py
index e255702e..60be3373 100644
--- a/tests/test_thirdparty_skill_curation.py
+++ b/tests/test_thirdparty_skill_curation.py
@@ -20,7 +20,11 @@ class ThirdpartySkillCurationTests(unittest.TestCase):
self.assertIn("id: codebase-recon", text)
self.assertIn("upstream_repo: https://github.com/outfitter-dev/agents", text)
- self.assertIn("upstream_path: plugins/outfitter/skills/codebase-recon", text)
+ self.assertIn("upstream_paths:", text)
+ self.assertIn("plugins/outfitter/skills/codebase-recon", text)
+ self.assertIn("plugins/outfitter/skills/pathfinding", text)
+ self.assertIn("upstream_layout: multi_skill_subset", text)
+ self.assertIn("pathfinding/references/confidence.md", text)
self.assertIn("risk scan", text)
self.assertIn("id: codebase-migrate", text)
diff --git a/tests/test_thirdparty_skills_pipeline.py b/tests/test_thirdparty_skills_pipeline.py
index 40fef3d0..c68660b8 100644
--- a/tests/test_thirdparty_skills_pipeline.py
+++ b/tests/test_thirdparty_skills_pipeline.py
@@ -19,6 +19,7 @@ UI_UX_PRO_MAX_LIST = ROOT / "skills" / "thirdparty" / ".sources" / "ui-ux-pro-ma
UI_UX_PRO_MAX_DIR = ROOT / "skills" / "thirdparty" / "ui-ux-pro-max"
BROOKS_LINT_LIST = ROOT / "skills" / "thirdparty" / ".sources" / "brooks-lint.list"
CODEBASE_RECON_LIST = ROOT / "skills" / "thirdparty" / ".sources" / "codebase-recon.list"
+PATHFINDING_DIR = ROOT / "skills" / "thirdparty" / "pathfinding"
def load_manifest() -> dict:
@@ -96,7 +97,7 @@ class ThirdpartySkillsPipelineTests(unittest.TestCase):
self.assertEqual(
recon["source_list"], "skills/thirdparty/.sources/codebase-recon.list"
)
- self.assertEqual(recon["include_skill_dirs"], ["codebase-recon"])
+ self.assertEqual(recon["include_skill_dirs"], ["codebase-recon", "pathfinding"])
migrate = next(
item for item in data["sources"] if item["id"] == "codebase-migrate"
@@ -158,8 +159,16 @@ class ThirdpartySkillsPipelineTests(unittest.TestCase):
def test_superpowers_and_ui_ux_pro_max_source_lists_exist(self):
self.assertTrue(SUPERPOWERS_LIST.is_file())
self.assertTrue(UI_UX_PRO_MAX_LIST.is_file())
+ self.assertTrue(CODEBASE_RECON_LIST.is_file())
self.assertIn("using-superpowers", SUPERPOWERS_LIST.read_text(encoding="utf-8"))
self.assertIn("ui-ux-pro-max", UI_UX_PRO_MAX_LIST.read_text(encoding="utf-8"))
+ self.assertIn("pathfinding", CODEBASE_RECON_LIST.read_text(encoding="utf-8"))
+
+ def test_codebase_recon_pathfinding_dependency_is_synced(self):
+ self.assertTrue((PATHFINDING_DIR / "SKILL.md").is_file())
+ self.assertTrue(
+ (PATHFINDING_DIR / "references" / "confidence.md").is_file()
+ )
def test_ui_ux_pro_max_output_exists_with_data_and_scripts(self):
self.assertTrue((UI_UX_PRO_MAX_DIR / "SKILL.md").is_file())