diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..575d489 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +.idea/ +tests/docker/.env diff --git a/ACCEPTANCE.md b/ACCEPTANCE.md new file mode 100644 index 0000000..9783786 --- /dev/null +++ b/ACCEPTANCE.md @@ -0,0 +1,256 @@ +# Yonexus v1 验收与回归清单 + +本清单服务于 `TASKLIST.md` 中的 YNX-1205。 + +目标:给后续开发、联调、回归提供一份统一基线,避免只凭“能跑起来了”判断完成度。 + +--- + +## 1. 范围 + +覆盖对象: + +- `Yonexus.Protocol` +- `Yonexus.Server` +- `Yonexus.Client` +- Server ↔ Client 联调主流程 +- pairing / auth / heartbeat / rule dispatch 关键失败路径 + +不覆盖: + +- 多服务器拓扑 +- 离线消息队列 +- 管理 UI +- 复杂规则匹配 + +--- + +## 2. 协议层验收 + +### 2.1 builtin 编解码 + +必须验证: + +- `builtin::{json}` 能正确编码 +- `builtin::{json}` 能正确解码 +- malformed builtin 消息会返回标准错误 +- 未支持 builtin type 可被明确拒绝 + +### 2.2 rule message 解析 + +必须验证: + +- `${rule}::${content}` 可被正确解析 +- `${rule}::${sender}::${content}` 可被正确解析 +- `content` 中包含 `::` 时不会被错误拆分 +- `builtin` 不能作为普通 rule 注册 + +### 2.3 共享认证约束 + +必须验证: + +- nonce 长度固定为 24 +- timestamp 新鲜度窗口符合协议 +- 签名输入序列化规则固定且可复用 + +--- + +## 3. Server 单体验收 + +### 3.1 启动与配置 + +必须验证: + +- 缺失 `followerIdentifiers` 会 fail fast +- 缺失 `notifyBotToken` / `adminUserId` / `listenPort` 会 fail fast +- 非法 `listenPort` 会 fail fast +- 启动时会加载持久化记录并补齐 allowlist 初始记录 + +### 3.2 pairing + +必须验证: + +- 未配对 allowlisted client 进入 `pair_required` +- server 创建 pending pairing 记录 +- pairing code 不通过 Yonexus WebSocket 下发 +- pairing 通知失败时返回 `admin_notification_failed` +- 正确 pairing code 返回 `pair_success` +- 错误 pairing code 返回 `invalid_code` +- 过期 pairing code 返回 `expired` + +### 3.3 auth + +必须验证: + +- paired client 可通过合法签名拿到 `auth_success` +- 非 allowlisted identifier 被拒绝 +- 未配对 identifier 不可通过 auth +- public key 不匹配会失败 +- stale/future timestamp 会失败 +- nonce collision 会触发 `re_pair_required` +- 超过 `>10 attempts / 10s` 会触发 `re_pair_required` + +### 3.4 liveness + +必须验证: + +- heartbeat 后更新 `lastHeartbeatAt` +- 7 分钟无心跳转为 `unstable` +- 11 分钟无心跳转为 `offline` +- `offline` 时发送 `disconnect_notice` 并断开连接 + +### 3.5 messaging + +必须验证: + +- 未认证 client 发送 rule message 会被拒绝 +- 已认证 client 的消息会被重写为 `${rule}::${sender}::${content}` +- duplicate rule 注册默认失败 +- `sendMessageToClient()` 对离线 client 返回失败 + +--- + +## 4. Client 单体验收 + +### 4.1 启动与本地状态 + +必须验证: + +- 缺失 state 文件时可初始化最小状态 +- 首次启动会自动生成 Ed25519 keypair +- 重启后不会重复生成 keypair +- 已有 secret 时可进入 auth 流程 + +### 4.2 连接与重连 + +必须验证: + +- 可连接到可用的 server +- server 不可用时会按退避策略重连 +- 手动断开不会误触发自动重连 +- 成功重连后退避计数会重置 + +### 4.3 pairing / auth + +必须验证: + +- 收到 `pair_request` 后进入待确认状态 +- 可提交 pairing code +- 收到 `pair_success` 后保存 secret +- 收到 `hello_ack(auth_required)` 后自动发 `auth_request` +- 收到 `auth_success` 后进入 authenticated +- 收到 `re_pair_required` 后清理本地 secret 并回退到 `pair_required` + +### 4.4 heartbeat / dispatch + +必须验证: + +- authenticated 后启动 heartbeat loop +- 断线或未认证时停止 heartbeat loop +- `registerRule()` 拒绝 `builtin` +- `sendMessageToServer()` 拒绝 `builtin::` 和非法格式 + +--- + +## 5. 联调验收 + +### 5.1 首次配对闭环 + +必须通过: + +1. Client 连接 Server +2. Client 发送 `hello` +3. Server 返回 `hello_ack(pair_required)` +4. Server 创建 pairing request 并发出管理员通知 +5. Client 提交正确 pairing code +6. Server 返回 `pair_success` +7. Client 保存 secret +8. Client 发送 `auth_request` +9. Server 返回 `auth_success` +10. Client 进入 authenticated 并开始 heartbeat + +### 5.2 正常重连闭环 + +必须通过: + +1. 已配对 Client 重连 +2. `hello_ack(auth_required)` +3. Client 发送合法 `auth_request` +4. Server 返回 `auth_success` +5. 心跳恢复正常 + +### 5.3 规则消息闭环 + +必须通过: + +1. Client 认证成功 +2. Client 调用 `sendRuleMessage()` +3. Server 收到并完成 sender rewrite +4. Server 规则处理器命中 exact match +5. Server 调用 `sendRuleMessageToClient()` 回发消息 +6. Client 本地规则处理器收到消息 + +--- + +## 6. 失败路径回归矩阵 + +每次关键改动后,至少回归以下场景: + +- pairing code 错误 +- pairing 过期 +- pairing 通知失败 +- unsupported protocol version +- malformed builtin frame +- unknown identifier +- invalid signature +- stale timestamp +- future timestamp +- nonce collision +- handshake rate limit +- duplicate active connection 竞争 +- 未认证连接发送 rule message + +--- + +## 7. 自动化建议 + +建议的最小自动化分层: + +- `Yonexus.Protocol`: 单元测试,锁定 codec / types / auth helpers +- `Yonexus.Server`: 单元测试,覆盖 runtime + pairing/auth/liveness 核心逻辑 +- `Yonexus.Client`: 单元测试,覆盖 state/transport/runtime 主状态机 +- Server + Client: 集成测试,覆盖 happy path 与关键失败路径 + +建议把通过条件固化为: + +- `Yonexus.Protocol` 类型检查 + 测试必须全绿 +- Server / Client 类型检查必须全绿 +- 新增联调测试后,happy path 与至少一组安全失败路径必须全绿 + +推荐的 umbrella 仓库最小回归入口: + +```bash +./scripts/validate-v1.sh +``` + +该脚本会顺序执行: +- `Yonexus.Protocol`: `npm run check && npm run test` +- `Yonexus.Server`: `npm run check && npm run test` +- `Yonexus.Client`: `npm run check && npm run test` + +如果某个子仓库尚未安装依赖,脚本会优先自动执行: +- 有 `package-lock.json` 时使用 `npm ci` +- 否则回退到 `npm install` + +--- + +## 8. 当前对应关系 + +与 `TASKLIST.md` 对应关系: + +- YNX-1101:协议单元测试 +- YNX-1102:Server 单元测试 +- YNX-1103:Client 单元测试 +- YNX-1104:Server-Client 集成测试 +- YNX-1105:失败路径测试矩阵 +- YNX-1205:协议测试与验收清单(本文件) diff --git a/DEPLOYMENT.md b/DEPLOYMENT.md new file mode 100644 index 0000000..535fff9 --- /dev/null +++ b/DEPLOYMENT.md @@ -0,0 +1,123 @@ +# Yonexus 部署指南 (v1) + +本指南面向 **单主多从** 拓扑: +- **主节点**:运行 `Yonexus.Server` +- **从节点**:运行 `Yonexus.Client` + +> 说明:Yonexus 采用三仓库/子模块结构(Umbrella + Server + Client + Protocol)。 + +--- + +## 1. 拓扑与前置条件 + +- **主节点**需要可被从节点访问的稳定地址(域名或固定 IP) +- **从节点**只需能 outbound 访问主节点 WebSocket +- 需要一个 Discord Bot Token,用于向管理员 DM 配对码 +- 需要管理员的 Discord User ID + +--- + +## 2. 仓库结构与同步 + +在 umbrella 仓库内: + +``` +Yonexus/ +├── Yonexus.Server +├── Yonexus.Client +├── Yonexus.Protocol +``` + +确保子模块已更新: + +```bash +git submodule update --init --recursive +``` + +--- + +## 3. 主节点部署(Yonexus.Server) + +### 3.1 安装与构建 + +```bash +cd Yonexus.Server +npm install +npm run build +``` + +### 3.2 配置 + +示例配置(OpenClaw 配置中): + +```json +{ + "followerIdentifiers": ["client-a", "client-b"], + "notifyBotToken": "", + "adminUserId": "123456789012345678", + "listenHost": "0.0.0.0", + "listenPort": 8787, + "publicWsUrl": "wss://example.com/yonexus" +} +``` + +### 3.3 启动 + +- 将 `Yonexus.Server` 安装为 OpenClaw 插件 +- 启动 OpenClaw Gateway 后,Server 会自动启动 WebSocket 服务 + +--- + +## 4. 从节点部署(Yonexus.Client) + +### 4.1 安装与构建 + +```bash +cd Yonexus.Client +npm install +npm run build +``` + +### 4.2 配置 + +```json +{ + "mainHost": "wss://example.com/yonexus", + "identifier": "client-a", + "notifyBotToken": "", + "adminUserId": "123456789012345678" +} +``` + +### 4.3 启动 + +- 将 `Yonexus.Client` 安装为 OpenClaw 插件 +- 启动 OpenClaw Gateway 后,Client 会自动连接 Server + +--- + +## 5. 首次配对流程 + +1. Client 连接后发送 `hello` +2. Server 检测未配对,生成配对码 +3. Server 通过 Discord DM 将配对码发送给管理员 +4. 管理员将配对码转交给 Client 操作员 +5. Client 提交 `pair_confirm` 完成配对 +6. Server 返回 `pair_success` 并下发 `secret` +7. Client 进入认证流程并开始心跳 + +--- + +## 6. 版本与兼容性 + +- 协议版本:`1` +- 需要确保 `Yonexus.Protocol` 子模块与 Server/Client 使用的协议一致 + +--- + +## 7. 快速验证建议 + +- 主节点启动后确认 WebSocket 监听端口可达 +- 从节点能建立连接且收到 `hello_ack` +- 配对完成后收到 `auth_success` +- 5 分钟内可看到心跳日志/状态更新 diff --git a/FEAT.md b/FEAT.md index 09d485c..326f278 100644 --- a/FEAT.md +++ b/FEAT.md @@ -10,6 +10,33 @@ This repository now targets the split-plugin architecture only. --- +## Shared Terminology and v1 Scope + +Canonical terms used across the project: +- `identifier`: unique logical client name +- `rule_identifier`: exact-match application route key +- `builtin`: reserved protocol/system route namespace +- `pairingCode`: short-lived out-of-band pairing code +- `secret`: shared secret issued after pairing +- `publicKey` / `privateKey`: client signing keypair + +Locked v1 decisions: +- `heartbeat_ack` is optional +- client reconnect uses exponential backoff +- rule matching is exact-match only +- offline sends fail immediately instead of queueing +- `mainHost` is expected to be a full `ws://` or `wss://` URL + +Explicit v1 non-goals: +- multi-server topology +- direct client-to-client sockets +- offline queueing guarantees +- advanced rule matching +- management UI +- admin approval control plane beyond human relay of pairing codes + +--- + ## 1. Yonexus.Server Features ### 1.1 Server Runtime diff --git a/LESSONS_LEARNED.md b/LESSONS_LEARNED.md new file mode 100644 index 0000000..115c20d --- /dev/null +++ b/LESSONS_LEARNED.md @@ -0,0 +1,323 @@ +# OpenClaw 插件开发经验教训 + +> 记录插件开发过程中踩过的坑,供后续迭代参考。最初源自 Dirigent,后续经验来自 Yonexus。 + +--- + +## 1. OpenClaw 热重载与模块状态 + +**问题**:OpenClaw 每次热重载(hot-reload)会把插件模块放入新的 VM 隔离上下文,模块级变量全部重置。 + +```typescript +// ❌ 错误:热重载后 Map 被清空,turn 状态丢失 +const channelStates = new Map(); +``` + +**解法**:把需要跨热重载持久化的状态挂在 `globalThis` 上。 + +```typescript +// ✅ 正确:globalThis 绑定在 Node.js 进程层面,热重载不影响 +function channelStates(): Map { + if (!(_G._tmChannelStates instanceof Map)) + _G._tmChannelStates = new Map(); + return _G._tmChannelStates as Map; +} +``` + +**规则**: +- 业务状态(turn state、speaker list、pending turns)→ `globalThis` +- 热重载内部的临时变量(局部锁、dedup set)→ `globalThis`(理由同上) +- 无状态工具函数 → 普通模块变量即可 + +--- + +## 2. Hook 事件重复触发(Event Deduplication) + +**问题**:OpenClaw 热重载会把新的 handler 叠加在旧的 handler 上,同一事件(如 `agent_end`、`before_model_resolve`)被多个 handler 实例处理,导致: +- Turn 被推进两次 +- Speaker 被重复 suppress +- Schedule trigger 重复发送 + +**解法**:用挂在 `globalThis` 上的 `WeakSet`(事件对象)或 `Set`(runId)做去重。 + +```typescript +// before_model_resolve:事件对象去重(WeakSet 自动 GC) +const processed = new WeakSet(); +api.on("before_model_resolve", async (event) => { + if (processed.has(event as object)) return; + processed.add(event as object); + // ... +}); + +// agent_end:runId 去重(Set + 上限淘汰) +const processedRunIds = new Set(); +api.on("agent_end", async (event) => { + const runId = (event as any).runId; + if (processedRunIds.has(runId)) return; + processedRunIds.add(runId); + if (processedRunIds.size > 500) { + processedRunIds.delete(processedRunIds.values().next().value); + } + // ... +}); +``` + +**规则**:所有 hook handler 必须有去重逻辑,dedup 结构本身也要挂在 `globalThis`。 + +--- + +## 3. Gateway 生命周期事件与 Agent 会话事件的区别 + +**问题**:`gateway_start` / `gateway_stop` 是全局事件,只触发一次。但 `register()` 每次热重载都会被调用,导致 `gateway_start` handler 被重复注册,sidecar 被重复启动。 + +**解法**:用 `globalThis` flag 保证只注册一次。 + +```typescript +const _G = globalThis as Record; +const LIFECYCLE_KEY = "_dirigentGatewayLifecycleRegistered"; + +if (!_G[LIFECYCLE_KEY]) { + _G[LIFECYCLE_KEY] = true; + startSideCar(...); + api.on("gateway_stop", () => stopSideCar(...)); +} +``` + +**规则**: +- `gateway_start` / `gateway_stop` handler → `globalThis` flag 保护 +- `before_model_resolve` / `agent_end` / `message_received` → 每次 `register()` 都注册,但靠 event dedup 防止重复处理 + +--- + +## 4. ChannelStore 文件缓存陷阱 + +**问题**:`ChannelStore` 懒加载文件(第一次读后设 `loaded=true` 不再重读)。如果在 gateway 运行期间直接编辑 `dirigent-channels.json`,已存在的 `ChannelStore` 实例不会感知变化,`getMode()` 对新增 channel 返回 `"none"`,导致 turn management 完全失效(before_model_resolve 看到 `mode === "none"` 直接 return,不做任何 suppress)。 + +**现象**:新 channel 里所有 agent 同时响应,日志里没有任何 `before_model_resolve` 的 suppressing 或 anchor set 日志。 + +**解法(当前)**:编辑 `dirigent-channels.json` 后必须 `openclaw gateway restart`。 + +**更好的长期方案**:`ChannelStore` 应该在 `setMode()`/`setLockedMode()` 时通知所有实例,或改用 `fs.watch()` 监听文件变化,或每次 `getMode()` 都从文件读(对 read 频率低的场景可以接受)。 + +--- + +## 5. Discord 权限 Overwrite 的 type 字段 + +**问题**:设置 channel permission overwrite 时,`type` 字段含义: +- `type: 0` → 针对 **role**(角色) +- `type: 1` → 针对 **member**(成员/用户) + +将 bot 用户 ID 作为 member overwrite 时必须用 `type: 1`,用 `type: 0` 会返回错误或静默失败(Discord 会把 ID 当 role 处理)。 + +```typescript +// ✅ 正确 +{ id: botUserId, type: 1, allow: "68608", deny: "0" } +``` + +**常用 permission bitmask**: +- VIEW_CHANNEL = 1024 (1 << 10) +- SEND_MESSAGES = 2048 (1 << 11) +- READ_MESSAGE_HISTORY = 65536 (1 << 16) +- 三者合计 = 68608 + +--- + +## 6. AgentTool 的 execute API(非 handler) + +**问题**:OpenClaw Plugin SDK 要求 tool 使用 `execute: async (toolCallId, params) => {}` 接口,不是 `handler:`。如果需要 `ctx.agentId`,要使用工厂函数形式。 + +```typescript +// ✅ 正确 +api.registerTool({ + name: "my-tool", + // ...schema... + execute: async (toolCallId, params) => { + // toolCallId 是 string,params 是入参对象 + return { result: "ok" }; + }, +}); + +// ✅ 需要 agentId 时 +api.registerTool((ctx) => ({ + name: "my-tool", + execute: async (toolCallId, params) => { + const agentId = ctx.agentId; + // ... + }, +})); +``` + +--- + +## 7. Sidecar 锁文件防重复启动 + +**问题**:gateway 重启或热重载时 `startSideCar()` 可能被多次调用,导致多个 sidecar 进程竞争同一端口。 + +**解法**:写 lock 文件(`/tmp/dirigent-sidecar.lock`),启动前检查文件是否存在且对应进程仍在运行。 + +```typescript +const lockFile = "/tmp/dirigent-sidecar.lock"; +if (fs.existsSync(lockFile)) { + const pid = Number(fs.readFileSync(lockFile, "utf8").trim()); + if (isProcessAlive(pid)) { + logger.info("sidecar already running, skipping"); + return; + } +} +// 启动 sidecar,写 lock file +``` + +--- + +## 8. 并发 advanceSpeaker 竞争 + +**问题**:两个 VM 上下文的 `agent_end` handler 可能同时执行,两者都通过了 runId 去重(runId 不同),都调用 `advanceSpeaker`,导致 speaker index 被推进两次。 + +**解法**:在 `advanceSpeaker` 入口加 per-channel 锁(`Set` 挂在 `globalThis`)。 + +```typescript +if (advancingChannels.has(channelId)) return; // 已有并发调用,跳过 +advancingChannels.add(channelId); +try { + await advanceSpeaker(...); +} finally { + advancingChannels.delete(channelId); +} +``` + +--- + +## 9. isTurnPending 的生命周期边界 + +**问题**:`clearTurnPending` 的位置影响正确性: +- 太早(在 `advanceSpeaker` 前清除)→ 下一个 wakeup 可能被误判为合法 turn,在 cycle boundary 期间 index 尚未更新导致 speaker 错误 +- 太晚无问题,但在 `pollForTailMatch` 期间必须保持 `isTurnPending=true`,否则 re-trigger 会被当作合法 turn 重入 + +**正确位置**:`advanceSpeaker` 完成后、`triggerNextSpeaker` 前。 + +--- + +## 10. Discord Gateway 重连后的消息丢失 + +**问题**:Gateway 重启后,bot 重新连接 Discord WS 有延迟(10–30s)。如果在 bot 完全连接前就发送 schedule trigger(`<@bot_id>➡️`),bot 会错过该消息(WS 不推送历史消息)。 + +**现象**:发送了 trigger,channel 里能看到消息,但 bot 没有响应。 + +**解法**: +1. Gateway 重启后等待所有 bot 的 `discord client initialized` 日志出现再发种子消息 +2. 或手动补发 trigger + +**长期方案**:sidecar 可以暴露一个 `/status` 接口,等待所有 Discord 账号连接就绪后再允许外部发消息。 + +--- + +## 11. 连接型插件的热重载陷阱(Yonexus) + +**问题**:Yonexus.Client / Yonexus.Server 是"连接型插件"——插件本身管理一条持久 WebSocket 连接(或监听端口)。如果用模块级变量做启动防重复保护: + +```typescript +// ❌ 错误:热重载后新 VM 上下文重置,_started = false → 第二个 runtime 被创建 +let _started = false; +export function createPlugin(api) { + if (_started) return; + _started = true; + const runtime = createRuntime(...); + runtime.start(); +} +``` + +热重载后: +- **服务端**:第二个 runtime 尝试 bind 同一端口 → EADDRINUSE → `runtime.start()` 抛出 → 被 `.catch` 静默吞掉,但 `globalThis.__yonexusServer` 已被覆盖为指向新的(未启动的)transport → `sendRule()` 永远返回 false +- **客户端**:第二个 runtime 成功建立了新的 WebSocket 连接,与旧连接并存,产生重复认证 + +**解法**: +```typescript +// ✅ 正确:用 globalThis 保护,热重载后新 VM 上下文也能看到 flag +const _G = globalThis as Record; +const STARTED_KEY = "_yonexusClientStarted"; + +export function createPlugin(api) { + if (_G[STARTED_KEY]) { + // 热重载时更新 __yonexusClient 指向仍在运行的旧 runtime(存在 globalThis 上) + // 无需重新启动 + return; + } + _G[STARTED_KEY] = true; + // ... 创建并启动 runtime +} +``` + +如果需要让热重载后新注册的 hook/rule 生效,还需把 `ruleRegistry`、`onXxxCallbacks` 等也存到 `globalThis`,而不是在函数体内每次新建。 + +**规则**: +- 任何管理持久连接/监听端口的插件,其启动 flag 必须放 `globalThis` +- 相关的 registry、回调数组也应放 `globalThis`,否则热重载后 `__pluginId` API 对象被覆盖,旧 runtime 的回调数组失去引用 + +--- + +## 12. WebSocket 服务端 Transport 的消息路由竞态(Yonexus) + +**问题**:Server transport 在 `ws.on("message")` 里通过 identifier 查 `_connections` 得到 `ClientConnection`: + +```typescript +// ❌ 危险:当 ws_new 还在 tempConnections,但 _connections["test-client"] 指向即将关闭的 ws_old 时 +const connection = identifier ? this._connections.get(identifier) ?? tempConn : tempConn; +``` + +**场景**: +1. `ws_old`(外部测试脚本)已认证,`_connections["test-client"] = ws_old` +2. `ws_new`(插件重连)发 hello → 进入 tempConnections,assignedIdentifier = "test-client" +3. 插件发 `auth_request` → message handler 查 `_connections.get("test-client")` → 返回 ws_old +4. `promoteToAuthenticated("test-client", ws_old)` → ws_old 不在 tempConnections → 返回 false +5. `onClientAuthenticated` 仍然触发 → `_connections.get("test-client")` = ws_old(已关闭)→ `sendRule` 返回 false + +**解法**:消息路由时,如果发送方 `ws` 仍在 `tempConnections`,直接用 `tempConn`(持有正确 ws 引用的本地对象),**不再** fallback 到 `_connections`: + +```typescript +// ✅ 正确:按 ws 引用路由,不按 identifier 路由 +if (this.tempConnections.has(ws)) { + this.options.onMessage(tempConn, message); + return; +} +// ws 已 promote,从 _connections 中找 +let connection = tempConn; +for (const [, conn] of this._connections) { + if (conn.ws === ws) { connection = conn; break; } +} +this.options.onMessage(connection, message); +``` + +**附加修复**:`promoteToAuthenticated` 的返回值不应被忽略。只有 promote 成功时才触发 `onClientAuthenticated`: + +```typescript +const promoted = transport.promoteToAuthenticated(identifier, connection.ws); +if (promoted) { + options.onClientAuthenticated?.(identifier); +} +``` + +**规则**:WebSocket 服务端的消息路由应始终以**发送方的 ws 对象引用**为准,不以 identifier 查映射表。identifier 可能在 tempConnections 和 _connections 之间的过渡期产生歧义。 + +--- + +## 13. 服务端 Session 竞态 → 客户端 re-hello 恢复(Yonexus) + +**问题**:服务端在已认证连接关闭时(`onDisconnect`)删除对应的 session。如果另一个客户端连接(同 identifier)的 `auth_request` 恰好在 session 被删除之后到达,服务端返回 `auth_failed("not_paired")`,即使客户端持有有效 secret。 + +**场景**: +1. 测试脚本 ws_1 已认证 → session["test-client"] 存在 +2. 插件 ws_2 发送 hello → session["test-client"] 被覆写(socket = ws_2) +3. 测试脚本 ws_1 关闭 → `handleDisconnect("test-client")` → `sessions.delete("test-client")` +4. 插件 ws_2 发 `auth_request` → session 不存在 → `auth_failed("not_paired")` +5. 插件有 secret,但 `auth_required` 状态没有 re-hello 逻辑 → 永远卡住 + +**解法**:客户端收到 `auth_failed("not_paired")` 且持有有效 secret 时,重新发送 hello 以在服务端创建新 session,然后重试认证: + +```typescript +if (payload.reason === "not_paired" && hasClientSecret(this.clientState)) { + this.sendHello(); // 重建 session,触发 hello_ack("auth_required") → sendAuthRequest() + return; +} +``` + +**规则**:客户端凡是遇到"自己有凭据但服务端找不到 session"的错误,都应尝试重走 hello 流程,而不是直接进入 `auth_required` 等待用户干预。 diff --git a/OPENCLAW_PLUGIN_DEV.md b/OPENCLAW_PLUGIN_DEV.md new file mode 100644 index 0000000..17d0830 --- /dev/null +++ b/OPENCLAW_PLUGIN_DEV.md @@ -0,0 +1,478 @@ +# OpenClaw 插件开发规范与流程 + +> 基于 Dirigent 插件的实际开发经验整理,适用于任何 OpenClaw 插件。 + +--- + +## 一、插件项目结构 + +``` +proj-root/ # 插件项目根目录 + plugin/ # 插件本体(安装时复制到 ~/.openclaw/plugins//) + index.ts # 插件入口,export default { id, name, register } + openclaw.plugin.json # 插件 config schema 声明 + package.json # name、version、type: module + hooks/ + before-model-resolve.ts + agent-end.ts + message-received.ts + tools/ + register-tools.ts + commands/ + my-command.ts + core/ # 纯业务逻辑,不依赖 plugin-sdk,便于单元测试 + my-store.ts + web/ # HTTP 路由(可选) + my-api.ts + services/ # 插件管理的 sidecar 进程(随插件一起安装) + main.mjs # sidecar 入口 + sub-service/ + index.mjs + skills/ # 插件提供的 OpenClaw skill + my-skill/ + SKILL.md + scripts/ # 安装、测试、开发辅助脚本 + install.mjs # --install / --uninstall + smoke-test.sh + docs/ # 文档 + IMPLEMENTATION.md + dist/ # 构建产物(gitignore),install 脚本生成 +``` + +**约定**: +- 文件名用 kebab-case,导出函数用 camelCase +- `plugin/core/` 只放纯逻辑,不 import `openclaw/plugin-sdk`,便于单元测试 +- Hook 注册逻辑独立在 `hooks/` 目录,不写在 `index.ts` 里 + +--- + +## 二、插件入口(index.ts) + +### 2.1 Hook 型插件(常见场景) + +```typescript +import type { OpenClawPluginApi } from "openclaw/plugin-sdk"; + +// ── 全局生命周期保护 ── +const _G = globalThis as Record; +const LIFECYCLE_KEY = "_myPluginGatewayLifecycleRegistered"; + +export default { + id: "my-plugin", + name: "My Plugin", + register(api: OpenClawPluginApi) { + const config = normalizeConfig(api); + + // Gateway 生命周期:只注册一次 + if (!_G[LIFECYCLE_KEY]) { + _G[LIFECYCLE_KEY] = true; + // 启动 sidecar、初始化全局资源等 + api.on("gateway_stop", () => { /* 清理 */ }); + } + + // Agent 会话 hook:每次 register() 都注册,event-level dedup 防重复处理 + registerBeforeModelResolveHook({ api, config }); + registerAgentEndHook({ api, config }); + registerMessageReceivedHook({ api, config }); + + // Tools / Commands / Web + registerMyTools(api, config); + + api.logger.info("my-plugin: registered"); + }, +}; +``` + +### 2.2 连接型插件(管理持久 WebSocket / TCP 连接) + +插件本身作为 WebSocket 客户端或服务端时,必须把**启动 flag、runtime 引用、所有共享状态**全部挂在 `globalThis`。模块级 `let _started = false` 在热重载后新 VM 上下文中重置,导致第二个连接被建立(客户端)或端口被二次 bind(服务端)。 + +```typescript +const _G = globalThis as Record; +const STARTED_KEY = "_myConnPluginStarted"; +const RUNTIME_KEY = "_myConnPluginRuntime"; +const REGISTRY_KEY = "_myConnPluginRuleRegistry"; +const CALLBACKS_KEY = "_myConnPluginOnReadyCallbacks"; + +export function createPlugin(api: { rootDir: string; pluginConfig: unknown }): void { + // 每次 register() 都把最新的 registry / callbacks 挂到 globalThis, + // 供其他插件通过 __myConnPlugin 引用 + if (!(_G[REGISTRY_KEY] instanceof MyRuleRegistry)) { + _G[REGISTRY_KEY] = createRuleRegistry(); + } + if (!Array.isArray(_G[CALLBACKS_KEY])) { + _G[CALLBACKS_KEY] = []; + } + + const registry = _G[REGISTRY_KEY] as MyRuleRegistry; + const callbacks = _G[CALLBACKS_KEY] as Array<() => void>; + + // 暴露跨插件 API(每次都覆写,使 sendRule 等闭包捕获的 runtimeRef 是最新的) + _G["__myConnPlugin"] = { + registry, + onReady: callbacks, + sendMessage: (msg: string) => + (_G[RUNTIME_KEY] as MyRuntime | undefined)?.send(msg) ?? false, + }; + + // 只启动一次——不管热重载多少次 + if (_G[STARTED_KEY]) return; + _G[STARTED_KEY] = true; + + const runtime = createRuntime({ registry, onReady: (id) => callbacks.forEach(cb => cb()) }); + _G[RUNTIME_KEY] = runtime; + + process.once("SIGTERM", () => runtime.stop().catch(console.error)); + runtime.start().catch(console.error); +} +``` + +**关键区别**: +- `STARTED_KEY` 检查放在**最后**,在暴露 API 之后。这样热重载时 API 对象仍被更新(新模块的闭包),但 runtime 不会重复启动。 +- `sendMessage` 闭包通过 `_G[RUNTIME_KEY]` 访问 runtime,不依赖模块级变量。 + +--- + +## 三、Config Schema(openclaw.plugin.json) + +```json +{ + "$schema": "https://openclaw.ai/schemas/plugin-config.json", + "configSchema": { + "type": "object", + "additionalProperties": false, + "properties": { + "myToken": { "type": "string" }, + "myFlag": { "type": "boolean", "default": false }, + "myPort": { "type": "number", "default": 9000 } + } + } +} +``` + +**注意**: +- `additionalProperties: false` 是强制的——OpenClaw 会用 schema 验证 config,多余字段报错 +- 删除废弃字段时必须同步从 schema 里移除,否则旧 config 会导致 gateway 启动失败 +- 敏感字段(token、key)不要设 `default`,让用户手动配置 + +--- + +## 四、Hook 注册规范 + +### 4.1 before_model_resolve + +**用途**:在模型调用前干预,可以覆盖 model/provider。 + +```typescript +// ── 去重 ── +const _DEDUP_KEY = "_myPluginBMRDedup"; +if (!(_G[_DEDUP_KEY] instanceof WeakSet)) _G[_DEDUP_KEY] = new WeakSet(); +const dedup = _G[_DEDUP_KEY] as WeakSet; + +api.on("before_model_resolve", async (event, ctx) => { + if (dedup.has(event as object)) return; + dedup.add(event as object); + + const sessionKey = ctx.sessionKey; + if (!sessionKey) return; + + // 返回 modelOverride 即覆盖,无返回值则不干预 + return { modelOverride: "no-reply", providerOverride: "dirigent" }; +}); +``` + +**规则**: +- 必须有 WeakSet dedup(挂 globalThis) +- 返回值是 `{ modelOverride, providerOverride }` 或 `undefined` +- 异步操作(Discord API 调用等)尽量 try/catch,避免 unhandled rejection + +### 4.2 agent_end + +**用途**:agent 一轮对话结束后触发,用于推进状态、发送下一轮触发消息。 + +```typescript +// ── 去重 ── +const _DEDUP_KEY = "_myPluginAgentEndDedup"; +if (!(_G[_DEDUP_KEY] instanceof Set)) _G[_DEDUP_KEY] = new Set(); +const dedup = _G[_DEDUP_KEY] as Set; + +api.on("agent_end", async (event, ctx) => { + const runId = (event as any).runId as string; + if (runId) { + if (dedup.has(runId)) return; + dedup.add(runId); + if (dedup.size > 500) dedup.delete(dedup.values().next().value!); + } + + // 提取 agent 最终回复文本 + const messages = (event as any).messages as unknown[] ?? []; + const finalText = extractFinalText(messages); // 找最后一条 role=assistant 的文本 + // ... +}); +``` + +**规则**: +- 用 runId + Set 去重(WeakSet 不适合,runId 是 string) +- Set 要有上限淘汰(防内存泄漏) +- 提取 finalText 要从 messages 数组末尾向前找 `role === "assistant"` + +### 4.3 message_received + +**用途**:收到 Discord 新消息时触发。 + +```typescript +api.on("message_received", async (event, ctx) => { + try { + // channelId 提取逻辑(多个来源,兼容性处理) + const channelId = extractChannelId(ctx, event); + if (!channelId) return; + // ... + } catch (err) { + api.logger.warn(`my-plugin: message_received error: ${err}`); + } +}); +``` + +--- + +## 五、Tool 注册规范 + +```typescript +// 无需 ctx 的工具 +api.registerTool({ + name: "my-tool", + description: "Does something", + inputSchema: { + type: "object", + properties: { + param: { type: "string", description: "..." }, + }, + required: ["param"], + }, + execute: async (toolCallId, params) => { + const { param } = params as { param: string }; + return { result: "ok" }; + }, +}); + +// 需要 ctx(agentId 等)的工具:工厂函数形式 +api.registerTool((ctx) => ({ + name: "my-contextual-tool", + description: "...", + inputSchema: { /* ... */ }, + execute: async (toolCallId, params) => { + const agentId = ctx.agentId; + // ... + return { result: agentId }; + }, +})); +``` + +**注意**:接口是 `execute: async (toolCallId, params)` 而不是 `handler:`。 + +--- + +## 六、State 管理规范 + +| 数据类型 | 存放位置 | 原因 | +|---|---|---| +| 跨请求的业务状态(turn state 等) | `globalThis` | 热重载后模块变量重置 | +| Event dedup Set/WeakSet | `globalThis` | 同上 | +| 全局初始化 flag(gateway_start/stop) | `globalThis` | 防重复注册 | +| 连接型插件:启动 flag | `globalThis` | 热重载后模块变量重置,否则重复建连 | +| 连接型插件:runtime 引用 | `globalThis` | sendXxx 闭包需要访问仍在运行的实例 | +| 连接型插件:rule registry / 回调数组 | `globalThis` | 热重载后需与 runtime 共享同一实例 | +| 跨插件公共 API 对象(`__pluginId`) | `globalThis` | 其他插件通过 globalThis 访问 | +| 无状态工具函数 | 模块级 | 无需持久化 | +| 文件持久化数据(channel store 等) | 文件 + 内存缓存 | 需要跨 gateway 重启持久化 | + +**globalThis 命名约定**: +``` +_PluginXxx # 内部状态,例如 _yonexusClientPluginStarted +__ # 跨插件公共 API,例如 __yonexusClient +``` +内部状态用单下划线前缀,跨插件 API 用双下划线前缀,防止和其他插件冲突。 + +--- + +## 七、安装脚本规范(scripts/install.mjs) + +每个插件应提供标准安装脚本,支持 `--install` / `--uninstall` / `--update`。 + +``` +install 做的事: +1. 构建 dist(复制 plugin/ 和 services/ 到 dist/) +2. 复制 dist 到 ~/.openclaw/plugins// +3. 安装 skills(支持合并已有 skill 数据) +4. 配置 plugins.entries..enabled = true +5. 设置默认 config 字段(setIfMissing,不覆盖已有值,不触碰敏感字段) +6. 添加到 plugins.allow 列表 +7. 配置 model provider(如有 sidecar) + +uninstall 做的事: +1. 从 plugins.allow 移除 +2. 删除 plugins.entries. +3. 删除 plugins.load.paths 中的条目 +4. 删除安装目录 +5. 删除 skills +``` + +**关键细节**: +- 安装前先 `fs.rmSync(distDir, { recursive: true })` 清空旧 dist,防止残留文件 +- `setIfMissing`:只写入 undefined/null 的字段,不覆盖用户已设置的值 +- 敏感字段(token、secret)**绝对不要**在安装脚本中 set,注释说明需手动配置 +- schema 里有 `additionalProperties: false` 时,安装脚本写入的每个 config key 都必须在 schema 里声明 + +--- + +## 八、开发调试流程 + +### 日常开发循环 + +```bash +# 1. 修改代码(plugin/ 或 services/) +# 2. 重新安装 +node scripts/install.mjs --install + +# 3. 重启 gateway(必须!ChannelStore 等有文件缓存) +openclaw gateway restart + +# 4. 观察日志 +openclaw logs --follow # 或 tail -f /tmp/openclaw/openclaw-$(date +%F).log + +# 5. 发送测试消息验证 +``` + +### 日志关键词速查 + +| 关键词 | 说明 | +|---|---| +| `plugin registered` | register() 执行完毕 | +| `startSideCar called` / `already running` | sidecar 启动/已存在 | +| `before_model_resolve anchor set` | 当前 speaker 正常走到模型调用 | +| `before_model_resolve suppressing` | 非 speaker 被 suppress | +| `agent_end skipping stale turn` | stale NO_REPLY 被正确过滤 | +| `triggered next speaker` | 下一轮触发成功 | +| `entered dormant` | channel 进入休眠 | +| `moderator-callback woke dormant` | 休眠被外部消息唤醒 | +| `must NOT have additional properties` | schema 与实际 config 不一致 | + +### TypeScript 类型检查 + +```bash +make check # tsc --noEmit +make check-rules # 验证 rule fixture +make check-files # 验证必要文件存在 +``` + +### Sidecar smoke test + +```bash +make smoke # 测试 no-reply API 是否正常响应 +# 等价于: +curl -s http://127.0.0.1:8787/no-reply/v1/chat/completions \ + -X POST -H "Content-Type: application/json" \ + -d '{"model":"no-reply","messages":[{"role":"user","content":"hi"}]}' +``` + +--- + +## 九、常见陷阱 Checklist + +在提 PR 或部署前,检查以下项目: + +**通用** +- [ ] 所有 hook handler 有 event dedup(WeakSet for before_model_resolve,Set+runId for agent_end) +- [ ] dedup 结构挂在 `globalThis`,不是模块级变量 +- [ ] gateway 生命周期事件(gateway_start/stop)有 `globalThis` flag 保护 +- [ ] 业务状态(Map/Set)挂在 `globalThis` +- [ ] `openclaw.plugin.json` 里的 schema 与实际使用的 config 字段完全对齐 +- [ ] 安装脚本没有 set 任何 schema 中不存在的 config 字段 +- [ ] 敏感字段(token)不在安装脚本中 set,有注释说明手动配置方式 +- [ ] 安装前有 `fs.rmSync(distDir)` 清理旧文件 +- [ ] 新增 channel 后需要 `openclaw gateway restart`(文档或 CLI 提示) +- [ ] Discord permission overwrite 用 `type: 1`(member),不是 `type: 0`(role) +- [ ] Sidecar 有锁文件防重复启动 +- [ ] `agent_end` 的 Set 有上限淘汰(`size > 500` 时删 oldest) + +**连接型插件(WebSocket / TCP)** +- [ ] 启动 flag 用 `globalThis` 而非模块级 `let`,防热重载重复建连 +- [ ] runtime 引用存 `globalThis`,send 相关闭包通过 `_G[RUNTIME_KEY]` 访问 +- [ ] `ruleRegistry`、回调数组等共享对象存 `globalThis`,首次不存在时才初始化 +- [ ] 跨插件 API 对象(`__pluginId`)**每次** `register()` 都覆写(更新闭包),但 runtime 只启动一次 +- [ ] 消费方插件(注册进 registry 的插件)做好"provider 未加载"的防御判断 +- [ ] `.env` 文件加入 `.gitignore`,提交 `.env.example` 作为模板 + +--- + +## 十、跨插件 GlobalThis API 模式 + +当一个插件需要向同进程内的其他插件暴露功能(如 rule registry、send 接口、事件回调)时,使用 `globalThis.__pluginId` 约定。 + +### 提供方(Provider) + +```typescript +// 每次 register() 都更新暴露的对象(使 sendXxx 闭包始终指向最新 runtime) +// 但注意 registry / callbacks 用 globalThis 保证跨热重载稳定 + +const _G = globalThis as Record; + +// 1. 确保 registry 和 callbacks 只初始化一次 +if (!(_G["_myPluginRegistry"] instanceof MyRegistry)) { + _G["_myPluginRegistry"] = new MyRegistry(); +} +if (!Array.isArray(_G["_myPluginCallbacks"])) { + _G["_myPluginCallbacks"] = []; +} + +// 2. 覆写公共 API 对象(闭包捕获最新 runtime) +_G["__myPlugin"] = { + registry: _G["_myPluginRegistry"] as MyRegistry, + onEvent: _G["_myPluginCallbacks"] as Array<(data: unknown) => void>, + send: (msg: string): boolean => + (_G["_myPluginRuntime"] as MyRuntime | undefined)?.send(msg) ?? false, +}; +``` + +### 消费方(Consumer) + +```typescript +export default function register(_api) { + const provider = (globalThis as Record)["__myPlugin"]; + if (!provider) { + console.error("[my-consumer] __myPlugin not found — ensure provider loads first"); + return; + } + + // 注册 rule + (provider as { registry: MyRegistry }).registry.registerRule("my_rule", handler); + + // 订阅事件 + (provider as { onEvent: Array<() => void> }).onEvent.push(() => { + // ... + }); +} +``` + +### 加载顺序 + +`plugins.allow` 数组中 provider 必须排在 consumer **之前**,OpenClaw 按顺序加载插件。consumer 应在 `register()` 入口做 `if (!provider) return` 防御,避免 provider 未加载时崩溃。 + +--- + +## 十一、Config 变更流程 + +当需要新增、重命名或删除 config 字段时: + +1. **先改 `openclaw.plugin.json`**(schema 是 source of truth) +2. 改 `plugin/index.ts` 中的 `PluginConfig` 类型和 `normalizeConfig()` +3. 改安装脚本(`scripts/install.mjs`)中的 `setIfMissing` 调用 +4. 更新 `README.md` 中的 config 表格 +5. 如果是重命名,需要告知用户手动迁移现有 `openclaw.json` 中的 config key + +**重命名示例**(`noReplyPort` → `sideCarPort`): +```bash +# 用户侧迁移 +openclaw config unset plugins.entries.dirigent.config.noReplyPort +openclaw config set plugins.entries.dirigent.config.sideCarPort 8787 +``` diff --git a/OPERATIONS.md b/OPERATIONS.md new file mode 100644 index 0000000..6a045e7 --- /dev/null +++ b/OPERATIONS.md @@ -0,0 +1,113 @@ +# Yonexus 运维与排障指南 (v1) + +本指南覆盖常见运行状态、错误码与恢复步骤。 + +--- + +## 1. 运行状态速览 + +### Client 侧状态 +- `connecting`:正在连接 +- `pairing_required`:需要配对 +- `waiting_pair_confirm`:等待提交配对码 +- `authenticating`:认证中 +- `authenticated`:已认证,心跳中 + +### Server 侧状态 +- `online`:已认证且心跳正常 +- `unstable`:7 分钟未收到心跳 +- `offline`:11 分钟未收到心跳,已断开连接 + +--- + +## 2. 常见问题与处理 + +### 2.1 Client 无法连接 Server +**可能原因** +- `mainHost` 配置错误 +- Server 未启动或端口不可达 + +**处理** +- 检查 `mainHost` 是否为 `ws://` 或 `wss://` +- 验证 Server 监听端口是否对外开放 + +--- + +### 2.2 Client 一直停在 `pairing_required` +**可能原因** +- Server 未能发送 Discord DM +- `notifyBotToken` 或 `adminUserId` 配置错误 + +**处理** +- 检查 Server 日志是否出现 `admin_notification_failed` +- 确认 Bot 有向目标用户发送 DM 的权限 + +--- + +### 2.3 配对码无效 / 过期 +**可能原因** +- 输入错误 +- 配对码超过 TTL + +**处理** +- 重新触发配对流程(断线后重连) +- 确保管理员转发的配对码最新 + +--- + +### 2.4 认证失败 (`auth_failed`) +**可能原因** +- Secret 不匹配 +- 时间漂移过大 +- Nonce 重放或格式错误 + +**处理** +- 检查系统时间是否正确 +- 清除 Client 本地 secret,触发重新配对 + +--- + +### 2.5 频繁触发 `re_pair_required` +**可能原因** +- 非法重放或高频认证尝试 +- Client 有并发连接/重连异常 + +**处理** +- 确认同一 `identifier` 只存在一个活跃 Client +- 检查 Client 是否重复启动多个实例 + +--- + +## 3. 错误码参考 + +常见协议错误码: +- `MALFORMED_MESSAGE` +- `UNSUPPORTED_PROTOCOL_VERSION` +- `IDENTIFIER_NOT_ALLOWED` +- `PAIRING_REQUIRED` +- `PAIRING_EXPIRED` +- `ADMIN_NOTIFICATION_FAILED` +- `AUTH_FAILED` +- `NONCE_COLLISION` +- `RATE_LIMITED` +- `RE_PAIR_REQUIRED` + +--- + +## 4. 恢复步骤建议 + +**场景:Client 无法恢复认证** +1. 停止 Client +2. 删除本地 state 中的 secret +3. 重启 Client 触发重新配对 + +**场景:Server 端状态异常** +1. 检查持久化 store 文件是否损坏 +2. 必要时备份后清理 store 文件(会导致所有 Client 重新配对) + +--- + +## 5. 日志建议 + +- Server 日志中应避免输出 secret / 配对码明文 +- 建议在生产环境开启结构化日志并保留最小必要字段 diff --git a/PLAN.md b/PLAN.md index 4d53cd2..3d22891 100644 --- a/PLAN.md +++ b/PLAN.md @@ -124,6 +124,22 @@ Semantics: --- +## 4.4 Shared Terminology Baseline + +These names are normative across umbrella docs, protocol docs, and implementation repos: + +- `identifier`: the unique logical name of a client/follower instance. +- `rule_identifier`: the exact-match application route key. +- `builtin`: reserved rule namespace for protocol/system frames. +- `pairingCode`: short-lived out-of-band code delivered to the human admin. +- `secret`: server-issued shared secret used for reconnect proof construction. +- `publicKey` / `privateKey`: client-held signing keypair. +- `nextAction`: server-directed next step returned by `hello_ack`. + +Implementations should avoid introducing alternative synonyms for these fields unless there is a versioned migration plan. + +--- + ## 5. Runtime Lifecycle ## 5.1 Yonexus.Server Startup @@ -551,25 +567,39 @@ Not required in the first version unless explicitly added later: - direct client-to-client sockets - multi-server clustering - distributed consensus +- offline message queues or guaranteed delivery to disconnected clients +- advanced rule matching beyond exact string match - message ordering guarantees across reconnects - end-to-end payload encryption beyond the pairing/authentication requirements - management UI +- admin-side approve/deny control plane beyond human relay of pairing codes +- encryption-at-rest hardening beyond documenting current local storage limitations --- -## 16. Open Questions To Confirm Later +## 16. v1 Decisions Locked for Current Implementation -1. Exact signing algorithm: - - Ed25519 is a strong default candidate -2. Should `mainHost` accept only full WebSocket URLs or also raw `ip:port` strings? -3. Is human code relay sufficient for v1 pairing, or should admin approve/deny controls be added later? -4. On unsafe condition, should the old public key be retained or should the client generate a new keypair? -5. Should offline clients support queued outbound messages from server, or should sends fail immediately? -6. Are rule identifiers exact strings only, or should regex/prefix matching exist later? +The following implementation-boundary decisions are now treated as settled for v1: + +1. Signing algorithm default: Ed25519. +2. `mainHost` should be configured as a full `ws://` or `wss://` URL in v1. +3. Human relay of the pairing code is sufficient for v1; richer admin approve/deny control can wait. +4. `heartbeat_ack` remains optional. +5. Client reconnect uses exponential backoff. +6. Rule identifiers are exact-match strings only in v1. +7. Outbound sends to offline clients fail immediately rather than queueing. + +## 17. Open Questions To Confirm Later + +1. On unsafe condition, should the old public key be retained or should the client generate a new keypair? +2. Should future versions support explicit key rotation without full re-pairing? +3. Should offline clients support queued outbound messages from server in a later version? +4. Are richer admin approval workflows worth adding after v1 stabilizes? +5. Should encryption-at-rest become a hard requirement in v2? --- -## 17. Immediate Next Deliverables +## 18. Immediate Next Deliverables After this plan, the next files to create should be: - `FEAT.md` — feature checklist derived from this plan diff --git a/PROTOCOL.md b/PROTOCOL.md index 4103dd7..466e871 100644 --- a/PROTOCOL.md +++ b/PROTOCOL.md @@ -26,6 +26,22 @@ Important security rule: --- +## 1.1 Canonical Terminology + +These names are treated as protocol-level canonical terms: + +- `identifier`: unique logical identity of a Yonexus client instance. +- `rule_identifier`: exact-match routing key for application messages. +- `builtin`: reserved protocol namespace used only for Yonexus control frames. +- `pairingCode`: short-lived out-of-band code generated by the server for human-mediated pairing. +- `secret`: server-issued shared secret used in reconnect authentication proof construction. +- `publicKey` / `privateKey`: client signing keypair. +- `nextAction`: the server's directed next step in `hello_ack`. + +The protocol and implementation repos should prefer these exact names over synonyms. + +--- + ## 2. Transport Transport is WebSocket. @@ -35,13 +51,12 @@ Transport is WebSocket. - protocol frames are UTF-8 text in v1 - binary frames are not required in v1 -Client connects to configured `mainHost`, which may be: +Client connects to configured `mainHost`, which in v1 should be a full WebSocket URL: - `ws://host:port/path` - `wss://host:port/path` -- or raw `host:port` if normalized by implementation Recommended canonical config: -- prefer full WebSocket URL +- require/prefer a full WebSocket URL in v1 rather than raw `host:port` --- @@ -426,6 +441,9 @@ builtin::{ ### `heartbeat_ack` Optional response by `Yonexus.Server`. +v1 policy: +- `heartbeat_ack` may be enabled by the server but clients must not require it for healthy operation + Example: ```text @@ -624,6 +642,9 @@ Dispatch algorithm: 4. invoke the first exact match 5. ignore/log if no match is found +v1 policy: +- rule matching is exact string match only; prefix, wildcard, and regex routing are out of scope + Processor input: - on client: `${rule_identifier}::${message_content}` - on server for client-originated messages: `${rule_identifier}::${sender_identifier}::${message_content}` diff --git a/README.md b/README.md index eada631..2c124eb 100644 --- a/README.md +++ b/README.md @@ -154,12 +154,46 @@ Reserved rule: `builtin` --- +## Shared Terminology + +To keep the umbrella repo, protocol repo, and both plugin repos aligned, Yonexus uses these terms consistently: + +- `identifier`: the stable logical name of a follower/client instance, unique within one Yonexus network. +- `rule_identifier`: the exact-match application routing key used in `${rule_identifier}::${message_content}`. +- `builtin`: the reserved rule namespace for Yonexus protocol/control messages only. +- `pairingCode`: the short-lived out-of-band code generated by `Yonexus.Server` and delivered to a human admin by Discord DM. +- `secret`: the server-issued shared secret established after successful pairing and used in reconnect authentication proof construction. +- `publicKey` / `privateKey`: the client-owned signing keypair used for auth proof signing and verification. +- `nextAction`: the server decision returned in `hello_ack`, currently one of `pair_required`, `waiting_pair_confirm`, `auth_required`, or `rejected`. + +## v1 Scope Boundaries + +In scope for v1: +- WebSocket transport between one server and one or more clients +- out-of-band pairing via Discord DM to a human administrator +- signed reconnect authentication using `secret + nonce + timestamp` +- heartbeat/liveness tracking (`online | unstable | offline`) +- exact-match rule dispatch +- lightweight persistence for trust/state material +- optional `heartbeat_ack` +- exponential reconnect backoff on the client side + +Explicitly out of scope for v1: +- multi-server topology +- direct client-to-client sockets +- offline message queues / delivery guarantees +- advanced rule matching (prefix/regex/wildcard) +- management UI +- distributed consensus / clustering +- automatic admin approve/deny workflows beyond human relay of the pairing code +- encryption-at-rest hardening beyond documenting local sensitive storage behavior + ## Status -- planning/specification stage -- split-plugin architecture defined -- protocol draft defined in `Yonexus.Protocol` -- implementation not started yet +- umbrella/specification repo is aligned with the split architecture +- core implementation work is underway in `Yonexus.Server`, `Yonexus.Client`, and `Yonexus.Protocol` +- protocol/types/codec/test scaffolding already exists in `Yonexus.Protocol` +- runtime, transport, pairing, auth, heartbeat, rule dispatch, and test coverage are largely implemented in submodules; remaining work is focused on boundary cleanup and leftover failure-path coverage --- diff --git a/TASKLIST.md b/TASKLIST.md index 2a2fbaf..73fcff3 100644 --- a/TASKLIST.md +++ b/TASKLIST.md @@ -22,6 +22,9 @@ ## Phase 0 — 仓库与规范对齐 ### YNX-0001 统一仓库定位与术语 +**状态** +- [x] 已完成(2026-04-09) + **目标** - 统一文档中对 Yonexus、Yonexus.Server、Yonexus.Client、Yonexus.Protocol 的描述 - 明确这是“umbrella + 三个独立仓库/子模块”的模型 @@ -35,9 +38,17 @@ - 核心文档不存在角色冲突或架构冲突描述 - 协议字段名称在文档中保持一致 +**已完成内容** +- 已在 `README.md` 新增 Shared Terminology 区块,统一 umbrella repo 对核心术语的定义 +- 已在 `PLAN.md` 新增 `Shared Terminology Baseline`,明确这些字段名为跨仓库规范用语 +- 已在 `FEAT.md` 与 `PROTOCOL.md` 同步补齐 canonical terminology,避免后续实现和文档再出现同义词漂移 + --- ### YNX-0002 定义 v1 实现边界 +**状态** +- [x] 已完成(2026-04-09) + **目标** - 把“必须做”和“以后再说”彻底分开 @@ -51,11 +62,19 @@ - 有一份可执行的 v1 scope 列表 - 实现阶段不再反复讨论边界 +**已完成内容** +- 已在 `README.md` 增加 `v1 Scope Boundaries`,把 in-scope / out-of-scope 一次写清 +- 已在 `PLAN.md` 将若干开放问题下沉为已锁定的 v1 决策:Ed25519、完整 `mainHost` URL、`heartbeat_ack` optional、指数退避、exact-match rule、离线发送立即失败 +- 已在 `FEAT.md` 和 `PROTOCOL.md` 同步这些边界,使功能清单与协议文档不再反复摇摆 + --- ## Phase 1 — 协议落地与共享契约 ### YNX-0101 固化 builtin 协议信封与类型枚举 +**状态** +- [x] 已完成(2026-04-08) + **目标** - 把 `PROTOCOL.md` 里的 builtin envelope 转成共享 TypeScript 类型 @@ -72,9 +91,17 @@ - Server / Client 都能直接复用类型 - 所有 builtin 消息都可被类型系统约束 +**已完成内容** +- 已在 `Yonexus.Protocol/src/types.ts` 落地 `BuiltinEnvelope`、builtin `type` 联合类型、各类 payload 接口与 `BuiltinPayloadMap` +- 已增加 `Yonexus.Protocol/src/index.ts` 作为导出入口 +- 已同步更新 `Yonexus.Protocol/README.md`,让共享类型入口不再停留在 planned 状态 + --- ### YNX-0102 实现协议编解码工具 +**状态** +- [x] 已完成(2026-04-08) + **目标** - 提供统一的字符串协议解析与序列化能力 @@ -89,9 +116,19 @@ - `builtin::{json}` 可稳定双向转换 - `rule::content` 和 `rule::sender::content` 都能正确解析 +**已完成内容** +- 已新增 `Yonexus.Protocol/src/codec.ts` +- 已实现 `encodeBuiltin` / `decodeBuiltin` 及类型安全的 envelope builders +- 已实现 `parseRuleMessage` / `parseRewrittenRuleMessage` / `encodeRuleMessage` / `encodeRewrittenRuleMessage` +- 已提供 `CodecError` 标准错误类及辅助函数 `isBuiltinMessage` +- 已更新 `Yonexus.Protocol/src/index.ts` 导出 codec 模块 + --- ### YNX-0103 定义协议错误码与错误对象 +**状态** +- [x] 已完成(2026-04-08) + **目标** - 统一错误语义,避免 Server / Client 各自发明错误格式 @@ -104,9 +141,17 @@ - 所有失败路径都能落到有限集合的错误码 - 文档与代码错误码一致 +**已完成内容** +- 已在 `Yonexus.Protocol/src/types.ts` 固化 `ProtocolErrorCode` 与 `ErrorPayload` +- 已新增 `Yonexus.Protocol/src/errors.ts`,提供 `YonexusProtocolError`、错误分类映射、payload/envelope 转换辅助函数 +- 已更新 `Yonexus.Protocol/src/index.ts` 导出错误模块,方便 Server / Client 统一复用 + --- ### YNX-0104 编写协议级测试样例 +**状态** +- [x] 已完成(2026-04-08) + **目标** - 在实现运行时前,先锁定协议行为 @@ -119,20 +164,30 @@ - 协议测试能独立运行 - 后续实现可直接拿这些样例做回归 +**已完成内容** +- 已新增 `Yonexus.Protocol/tests/codec.test.ts` +- 已覆盖 `encodeBuiltin`/`decodeBuiltin`、`parseRuleMessage`/`encodeRuleMessage`、服务器重写消息解析/编码 +- 已包含 malformed message、非法 rule identifier、保留字冲突等反例 +- 已提供完整的 hello flow、rule message flow 示例 +- 已配置 `package.json`、`tsconfig.json`、`vitest.config.ts` 测试基础设施 + --- ## Phase 2 — Server 插件脚手架 ### YNX-0201 创建 Yonexus.Server 最小插件骨架 +**状态** +- [x] 已完成(2026-04-08) + **目标** - 让 Server 插件可被 OpenClaw 加载 -**子任务** -- 创建插件目录结构 -- 创建 `package.json` -- 创建 `openclaw.plugin.json` -- 创建入口文件 `index.ts` -- 添加基础构建配置 +**已完成内容** +- 已补齐 `plugin/`、`servers/`、`skills/`、`scripts/` 目录骨架 +- 已创建 `package.json`、`tsconfig.json` +- 已写入 `plugin/openclaw.plugin.json` +- 已补齐 wiring-only `plugin/index.ts` +- 已补齐最小 `scripts/install.mjs` **验收标准** - 插件能被识别 @@ -141,6 +196,9 @@ --- ### YNX-0202 定义 Yonexus.Server 配置 schema +**状态** +- [x] 已完成(2026-04-08) + **目标** - 让 Server 配置在启动前就能被校验 @@ -156,9 +214,18 @@ - 缺字段和非法字段会 fail fast - 错误信息足够定位问题 +**已完成内容** +- 已新增 `Yonexus.Server/plugin/core/config.ts` +- 已实现 `validateYonexusServerConfig()` 与 `YonexusServerConfigError` +- 已覆盖 `followerIdentifiers`、`notifyBotToken`、`adminUserId`、`listenPort`、`listenHost`、`publicWsUrl` 的基础约束与默认值处理 +- 已从 `plugin/index.ts` 导出配置类型与校验入口,方便后续 lifecycle wiring 复用 + --- ### YNX-0203 实现 Server 生命周期 wiring +**状态** +- [x] 已完成(2026-04-08) + **目标** - 在 gateway 启动时初始化 Server 内部组件 @@ -173,20 +240,29 @@ - 启停流程完整 - 不会留下悬挂 timer / socket +**已完成内容** +- 已新增 `Yonexus.Server/plugin/core/runtime.ts` +- 已实现 `YonexusServerRuntime`,负责加载持久化记录、补齐 allowlist 默认 record、驱动 transport start/stop 与 shutdown 前持久化 +- 已把连接会话映射进 `ServerRegistry.sessions`,并在断开时回收 session / 更新离线状态 +- 已从 `Yonexus.Server/plugin/index.ts` 导出 runtime 入口,便于后续接入 OpenClaw lifecycle wiring + --- ## Phase 3 — Client 插件脚手架 ### YNX-0301 创建 Yonexus.Client 最小插件骨架 +**状态** +- [x] 已完成(2026-04-08) + **目标** - 让 Client 插件可被 OpenClaw 加载 -**子任务** -- 创建插件目录结构 -- 创建 `package.json` -- 创建 `openclaw.plugin.json` -- 创建入口文件 `index.ts` -- 添加基础构建配置 +**已完成内容** +- 已补齐 `plugin/`、`servers/`、`skills/`、`scripts/` 目录骨架 +- 已创建 `package.json`、`tsconfig.json` +- 已写入 `plugin/openclaw.plugin.json` +- 已补齐 wiring-only `plugin/index.ts` +- 已补齐最小 `scripts/install.mjs` **验收标准** - 插件能被识别并加载 @@ -194,6 +270,9 @@ --- ### YNX-0302 定义 Yonexus.Client 配置 schema +**状态** +- [x] 已完成(2026-04-08) + **目标** - 保证 Client 配置可启动前校验 @@ -207,9 +286,18 @@ **验收标准** - 配置错误可在启动时直接发现 +**已完成内容** +- 已新增 `Yonexus.Client/plugin/core/config.ts` +- 已实现 `validateYonexusClientConfig()` 与 `YonexusClientConfigError` +- 已覆盖 `mainHost`、`identifier`、`notifyBotToken`、`adminUserId` 的必填校验,并限定 `mainHost` 为 `ws://` / `wss://` +- 已从 `plugin/index.ts` 导出配置类型与校验入口,方便后续 lifecycle wiring 直接接入 + --- ### YNX-0303 实现 Client 生命周期 wiring +**状态** +- [x] 已完成(2026-04-08) + **目标** - 在 gateway 启动时初始化 Client 运行时 @@ -222,11 +310,20 @@ **验收标准** - Client 可启动并在未连接状态下稳定运行 +**已完成内容** +- 已新增 `Yonexus.Client/plugin/core/runtime.ts` +- 已实现 `YonexusClientRuntime`,负责加载本地 state、驱动 transport connect/disconnect,并维护最小 phase 状态机 +- 已将 lifecycle 状态从 `plugin/index.ts` 导出,便于后续挂接 gateway startup/shutdown +- 已把 hello 发送时机放到 transport 连接成功后统一触发,避免后续 handshake wiring 分散在多个入口 + --- ## Phase 4 — 持久化与状态模型 ### YNX-0401 定义 Server 持久化记录结构 +**状态** +- [x] 已完成(2026-04-08) + **目标** - 把 `PLAN.md` 中的 `ClientRecord` 落到代码 @@ -240,9 +337,20 @@ **验收标准** - 持久化结构足以支撑 pairing/auth/heartbeat +**已完成内容** +- 已新增 `Yonexus.Server/plugin/core/persistence.ts` +- 已实现 `ClientRecord`、`ClientSession`、`ServerRegistry` 接口 +- 已实现 `SerializedClientRecord`、`ServerPersistenceData` 持久化结构 +- 已提供 `createClientRecord`、`serializeClientRecord`、`deserializeClientRecord` 工厂函数 +- 已提供 `isPairable`、`hasPendingPairing`、`isPairingExpired`、`canAuthenticate` 状态检查函数 +- 已更新 `Yonexus.Server/plugin/index.ts` 导出 persistence 模块 + --- ### YNX-0402 实现 Server 状态存储 +**状态** +- [x] 已完成(2026-04-08) + **目标** - 让信任状态在重启后仍可恢复 @@ -256,9 +364,19 @@ - 重启后 paired client 不丢失 - 损坏文件时有可恢复行为 +**已完成内容** +- 已新增 `Yonexus.Server/plugin/core/store.ts`,采用 JSON 文件作为 v1 持久化载体 +- 已实现 `loadServerStore()` / `saveServerStore()` 与 `createYonexusServerStore()` +- 已使用 `*.tmp` + rename 的原子写入策略 +- 已在加载阶段校验文件结构,并在损坏/版本不匹配时抛出明确 corruption error +- 已明确 rolling security windows 通过 `deserializeClientRecord()` 在重启后清空 + --- ### YNX-0403 实现 Client 本地信任材料存储 +**状态** +- [x] 已完成(2026-04-08) + **目标** - 持久化 Client 的 `identifier`、私钥、secret @@ -271,11 +389,21 @@ **验收标准** - Client 重启后可恢复身份与 secret +**已完成内容** +- 已新增 `Yonexus.Client/plugin/core/state.ts` +- 已定义 `YonexusClientState` / `YonexusClientStateFile`,覆盖 `identifier`、`privateKey`、`publicKey`、`secret` 与关键时间戳 +- 已实现 `loadYonexusClientState()` / `saveYonexusClientState()` / `createYonexusClientStateStore()` +- 已支持 state 文件缺失时自动创建最小初始状态 +- 已补充 `hasClientSecret()` 与 `hasClientKeyPair()`,方便后续 handshake wiring 判断本地信任材料是否齐备 + --- ## Phase 5 — Transport 最小闭环 ### YNX-0501 实现 Server WebSocket 启动与连接接入 +**状态** +- [x] 已完成(2026-04-08) + **目标** - Server 能监听并接受连接 @@ -288,9 +416,18 @@ - 可看到客户端连接进入 - 无协议时也不会崩 +**已完成内容** +- 已新增 `Yonexus.Server/plugin/core/transport.ts` +- 已实现 `YonexusServerTransport`,支持 start/stop、send/broadcast、连接管理 +- 已实现临时连接追踪、已认证连接管理、单 identifier 单连接策略 +- 已更新 `plugin/index.ts` 导出 transport 模块 + --- ### YNX-0502 实现 Client WebSocket 连接器 +**状态** +- [x] 已完成(2026-04-08) + **目标** - Client 能主动连到 Server @@ -303,9 +440,19 @@ - Client 能连上可用的 Server - Server 不可用时不会死循环刷日志 +**已完成内容** +- 已新增 `Yonexus.Client/plugin/core/transport.ts` +- 已实现 `YonexusClientTransport`,支持 connect/disconnect/send +- 已实现指数退避重连策略(max 10 次,1s~30s 退避) +- 已实现心跳 timer 基础设施 +- 已更新 `plugin/index.ts` 导出 transport 模块 + --- ### YNX-0503 实现 hello / hello_ack +**状态** +- [x] 已完成(2026-04-08) + **目标** - 完成连接后的第一段协议握手 @@ -320,9 +467,21 @@ - 已配对客户端收到 `auth_required` - 非 allowlist 客户端被拒绝 +**已完成内容** +- Client runtime 已在连接建立后发送 `hello`,内容包含 `identifier`、协议版本、`hasSecret`、`hasKeyPair` 与可选 `publicKey` +- Server runtime 已接入 builtin `hello` 处理,校验 allowlist 与协议版本,并返回 `hello_ack` +- `hello_ack.nextAction` 已按 record 状态区分为 `pair_required` / `waiting_pair_confirm` / `auth_required` +- 对非法 identifier 与不支持的协议版本已返回协议错误,并在版本不匹配时主动关闭连接 + +**进展说明** +- 本次只完成最小握手闭环;后续 `auth_request`、`pair_request` 等具体动作仍由 Phase 6/7 继续补齐 + --- ### YNX-0504 实现基础重连策略 +**状态** +- [x] 已完成(2026-04-08) + **目标** - Client 断线后可恢复连接 @@ -335,11 +494,20 @@ - 断线可自动恢复 - 不会形成高频重连风暴 +**已完成内容** +- 已将 Client transport 重连逻辑改为覆盖所有非主动异常断线,而不再仅限“已认证后掉线”场景 +- 已增加 `shouldReconnect` 标志,确保手动 `disconnect()` 不会误触发自动重连 +- 已在每次 `connect()` 前清理旧的 reconnect timer,避免并发重连尝试叠加 +- 已在成功连接后移除首个 `error` 监听并重置退避计数,避免首次建连阶段的错误监听残留 + --- ## Phase 6 — Pairing 主流程 ### YNX-0601 实现 Client 首次密钥生成 +**状态** +- [x] 已完成(2026-04-08) + **目标** - Client 首次运行自动生成本地公私钥 @@ -353,9 +521,17 @@ - 首次启动可生成并持久化 keypair - 重启不会重复生成 +**已完成内容** +- 已新增 `Yonexus.Client/plugin/crypto/keypair.ts`,使用 Ed25519 生成并导出 PEM keypair +- 已在 `Yonexus.Client/plugin/core/state.ts` 增加 `ensureClientKeyPair()`,首次启动自动生成并持久化 keypair +- Client runtime 启动时自动确保 keypair 存在 + --- ### YNX-0602 实现 Server pairing request 创建 +**状态** +- [x] 已完成(2026-04-08) + **目标** - Server 可为待配对客户端创建 pairing 会话 @@ -369,9 +545,16 @@ - 每次 pairing 会话有可验证的过期时间 - pairing code 不会通过 Yonexus WS 下发 +**已完成内容** +- 已新增 `Yonexus.Server/plugin/services/pairing.ts`,封装 pairing code/TTL/状态写入 +- Server runtime 在 `pair_required` 时创建 pending pairing 记录并持久化 + --- ### YNX-0603 实现 Discord DM 配对通知 +**状态** +- [x] 已完成(2026-04-09) + **目标** - Server 通过 `notifyBotToken` 向 `adminUserId` 发送 pairing code @@ -385,9 +568,19 @@ - 通知成功时 Client 才能进入可确认状态 - 通知失败时不会继续配对成功路径 +**已完成内容** +- 已将 `Yonexus.Server/plugin/notifications/discord.ts` 从 stub 升级为基于 Discord REST API 的真实 DM 发送实现 +- 已实现两段式调用:先创建/获取 DM channel,再向该 channel 发送 pairing message +- 已保留 `formatPairingMessage()` 与 mock service,便于测试与本地替身注入 +- 已在通知层补齐配置缺失、HTTP 非 2xx、返回 payload 缺少 channel id 等失败处理,失败时返回 `false` 让 runtime 落到 `admin_notification_failed` +- 已新增 `Yonexus.Server/tests/notifications.test.ts`,覆盖消息格式、成功发送、DM channel 创建失败、配置缺失四类场景 + --- ### YNX-0604 实现 pair_request / pair_confirm / pair_success +**状态** +- [x] 已完成(2026-04-08) + **目标** - 打通完整配对流程 @@ -404,9 +597,19 @@ - 正确 code 可完成配对 - 错误 code / 过期 code 会失败 +**已完成内容** +- Server runtime 已在 `hello_ack` 后发送 `pair_request`,并复用已有 pending pairing 的 TTL/状态元数据 +- Client runtime 已接收 `pair_request`,记录 pending pairing 元数据并切换到 `waiting_pair_confirm` +- Client runtime 已新增 `submitPairingCode()`,用于发送 `pair_confirm` +- Server runtime 已实现 `pair_confirm` 校验、`pair_success` 下发,并在成功后把 secret/publicKey 持久化到服务端记录 +- Client runtime 已在收到 `pair_success` 后保存 secret/pairedAt 到本地 state + --- ### YNX-0605 实现配对失败路径 +**状态** +- [x] 已完成(2026-04-08) + **目标** - 补齐 pairing 相关失败逻辑 @@ -421,11 +624,20 @@ **验收标准** - 失败后不会留下脏状态导致后续无法重试 +**已完成内容** +- Server runtime 已为 `pair_confirm` 补齐 `identifier_not_allowed`、`invalid_code`、`expired`、`internal_error` 的 `pair_failed` 返回 +- Server runtime 已在配对通知失败时下发 `pair_failed(admin_notification_failed)` 并清理 pending pairing 状态,避免留下脏状态 +- Client runtime 已记录最近一次 pairing failure,并根据 `expired` / `admin_notification_failed` 自动回退到 `pair_required` +- 其他失败原因会保留 `waiting_pair_confirm`,允许客户端在同一 pairing 会话内重试输入 code + --- ## Phase 7 — Authentication 主流程 ### YNX-0701 固化 proof 构造与签名规范 +**状态** +- [x] 已完成(2026-04-08) + **目标** - 明确签名输入,避免 Server / Client 不一致 @@ -437,9 +649,18 @@ **验收标准** - 同一输入在 Client 与 Server 上验证一致 +**已完成内容** +- 已新增 `Yonexus.Protocol/src/auth.ts`,固化认证共享常量:nonce 长度、时间漂移窗口、attempt window、recent nonce window +- 已实现 `createAuthRequestSigningInput()` / `extractAuthRequestSigningInput()`,统一以稳定 JSON 序列化 `{ secret, nonce, timestamp }` 作为签名输入 +- 已补充 `isValidAuthNonce()` 与 `isTimestampFresh()`,把 nonce 与时间窗口校验规则从文档落到共享代码 +- 已从 `Yonexus.Protocol/src/index.ts` 导出认证辅助模块,供 Client / Server 复用 + --- ### YNX-0702 实现 Client auth_request +**状态** +- [x] 已完成(2026-04-08) + **目标** - Client 能基于本地 secret 与私钥发起认证 @@ -453,9 +674,18 @@ - 报文字段完整 - nonce 格式符合协议要求 +**已完成内容** +- Client runtime 已在收到 `hello_ack(auth_required)` 或 `pair_success` 后自动发起 `auth_request` +- 已复用本地 `secret` + Ed25519 私钥生成签名,并发送 `identifier` / `nonce` / `proofTimestamp` / `signature` / `publicKey` +- 已把 transport 状态补齐为 `authenticating -> authenticated`,并在 `auth_success` 后落盘 `authenticatedAt` +- 已处理 `auth_failed` / `re_pair_required` 的最小状态回退逻辑 + --- ### YNX-0703 实现 Server 认证校验 +**状态** +- [x] 已完成(2026-04-08) + **目标** - Server 能验证 auth_request 真伪 @@ -473,9 +703,19 @@ - 正确认证返回 `auth_success` - 各类失败返回对应 `auth_failed` +**已完成内容** +- Server runtime 已接入 `auth_request` builtin 处理并校验 allowlist / paired 状态 / publicKey 一致性 +- 已复用共享签名输入规范与 Ed25519 验签逻辑,验证客户端签名 +- 已实现 timestamp freshness、nonce 格式、recent nonce collision、10s attempt window 的最小校验 +- 正常认证会更新 `lastAuthenticatedAt` / `lastHeartbeatAt` / liveness 状态,并返回 `auth_success` +- 遇到 nonce collision 或 rate limit 时会触发 `re_pair_required`,清理旧 secret 与安全窗口 + --- ### YNX-0704 实现 re_pair_required 机制 +**状态** +- [x] 已完成(2026-04-08) + **目标** - 在不安全条件下强制重新配对 @@ -489,11 +729,18 @@ **验收标准** - 不安全状态下不会继续接受旧信任材料 +**已完成内容** +- Server 端在 nonce collision / rate limit 时触发 `re_pair_required` 并清空 secret 与安全窗口 +- Client 收到 `re_pair_required` 或 `auth_failed(re_pair_required)` 后清除本地 secret 并回退到 `pair_required` + --- ## Phase 8 — Heartbeat 与在线状态 ### YNX-0801 实现 Client heartbeat loop +**状态** +- [x] 已完成(2026-04-08) + **目标** - Client 在认证后按周期发心跳 @@ -506,9 +753,16 @@ - 默认每 5 分钟发送一次 - 状态切换时 timer 无泄漏 +**已完成内容** +- Client transport 在认证后启动 5 分钟 heartbeat 定时器 +- Runtime 处理 heartbeat tick,构造并发送 `heartbeat` builtin + --- ### YNX-0802 实现 Server heartbeat 接收与记录 +**状态** +- [x] 已完成(2026-04-08) + **目标** - Server 能更新客户端最近存活时间 @@ -521,9 +775,16 @@ **验收标准** - 收到心跳后客户端状态可维持在线 +**已完成内容** +- Server runtime 已接入 `heartbeat` 处理,校验 allowlist 与认证状态 +- 记录 `lastHeartbeatAt` 并回发 `heartbeat_ack`(online 状态) + --- ### YNX-0803 实现 Server liveness sweep +**状态** +- [x] 已完成(2026-04-08) + **目标** - Server 能周期性评估 online / unstable / offline @@ -536,11 +797,21 @@ **验收标准** - 状态转移符合文档定义 +**已完成内容** +- 已在 `Yonexus.Server/plugin/core/runtime.ts` 增加可配置的 liveness sweep timer(默认 30s) +- 已实现基于 `lastHeartbeatAt` 的 `online -> unstable -> offline` 状态判定 +- 已在进入 `unstable` 时下发 `status_update(heartbeat_timeout_7m)` +- 已在进入 `offline` 时下发 `disconnect_notice(heartbeat_timeout_11m)`、关闭连接并清理 session +- 状态变化后会持久化,避免 sweep 结果只停留在内存中 + --- ## Phase 9 — 规则消息与 API ### YNX-0901 实现 Client rule registry +**状态** +- [x] 已完成(2026-04-08) + **目标** - Client 侧支持应用层规则分发 @@ -554,9 +825,19 @@ - 命中规则时正确调用处理器 - 非法注册被拒绝 +**已完成内容** +- 已新增 `Yonexus.Client/plugin/core/rules.ts` +- 已实现 `registerRule()` / `hasRule()` / `dispatch()` / `getRules()` +- 已复用协议 codec 校验 rule identifier,拒绝空值、非法标识符与保留字 `builtin` +- 已通过 `ClientRuleRegistryError` 固化重复注册与非法注册错误语义 +- 已从 `Yonexus.Client/plugin/index.ts` 导出 rule registry 相关类型与工厂 + --- ### YNX-0902 实现 Server rule registry +**状态** +- [x] 已完成(2026-04-08) + **目标** - Server 侧支持应用层规则分发 @@ -569,53 +850,70 @@ **验收标准** - 规则注册与调用行为一致 +**已完成内容** +- 已新增 `Yonexus.Server/plugin/core/rules.ts` +- 已实现面向服务端重写消息格式的 `registerRule()` / `hasRule()` / `dispatch()` / `getRules()` +- 已通过 `parseRewrittenRuleMessage()` 强制 server dispatch 处理 `${rule}::${sender}::${content}` 形态 +- 已通过 `ServerRuleRegistryError` 固化保留字与重复注册的拒绝行为 +- 已从 `Yonexus.Server/plugin/index.ts` 导出 rule registry 相关类型与工厂 + --- ### YNX-0903 实现 `sendMessageToServer(message)` +**状态** +- [x] 已完成(2026-04-08) + **目标** - 暴露 Client 到 Server 的发送 API -**子任务** -- 校验连接/认证状态 -- 发送 `${rule_identifier}::${message_content}` -- 未连接时返回明确错误 - -**验收标准** -- 上层插件可直接调用 +**已完成内容** +- 已在 `YonexusClientRuntime` 添加 `sendMessageToServer(message)` 方法 +- 已添加 `sendRuleMessage(ruleIdentifier, content)` 辅助方法 +- 已校验连接/认证状态,未连接时返回 false +- 已验证消息格式(拒绝 builtin:: 前缀,要求 :: 分隔符) +- 已复用 `encodeRuleMessage` 进行类型安全的消息编码 --- ### YNX-0904 实现 `sendMessageToClient(identifier, message)` +**状态** +- [x] 已完成(2026-04-08) + **目标** - 暴露 Server 到指定 Client 的发送 API -**子任务** -- 校验目标 client 已知且在线 -- 发送 `${rule_identifier}::${message_content}` -- 离线时返回明确错误 - -**验收标准** -- 上层插件可向指定客户端投递消息 +**已完成内容** +- 已在 `YonexusServerRuntime` 添加 `sendMessageToClient(identifier, message)` 方法 +- 已添加 `sendRuleMessageToClient(identifier, ruleIdentifier, content)` 辅助方法 +- 已校验目标 client 在线且已认证,离线时返回 false +- 已验证消息格式(拒绝 builtin:: 前缀,要求 :: 分隔符) +- 已复用 `encodeRewrittenRuleMessage` 进行类型安全的消息编码 --- ### YNX-0905 实现 Server 入站消息重写 +**状态** +- [x] 已完成(2026-04-08) + **目标** - 把客户端来的消息重写成带 sender 标识的形式 -**子任务** -- 解析 `${rule_identifier}::${message_content}` -- 重写为 `${rule_identifier}::${sender_identifier}::${message_content}` -- 再进入 rule dispatch - -**验收标准** -- Server 侧处理器能可靠识别消息来源 +**已完成内容** +- 已在 `YonexusServerRuntime` 添加 `handleRuleMessage(connection, raw)` 私有方法 +- 已更新 `handleMessage` 入口,非 builtin 消息自动进入 rule message 处理流程 +- 已实现 sender identifier 识别(从 connection 或 session 中解析) +- 已使用 `parseRuleMessage` + `encodeRewrittenRuleMessage` 完成消息重写 +- 重写格式:`${rule}::${content}` -> `${rule}::${sender}::${content}` +- 已校验客户端认证状态,未认证客户端发送 rule message 会被断开连接 --- ## Phase 10 — 安全与鲁棒性 ### YNX-1001 实现敏感信息脱敏日志 +**状态** +- [x] 已完成(2026-04-08) + **目标** - 避免 secret、私钥、proof 原文进入日志 @@ -627,9 +925,18 @@ **验收标准** - 关键敏感值不会明文打印 +**已完成内容** +- 已新增 `Yonexus.Server/plugin/core/logging.ts`,提供 `redactSecret()`、`redactPairingCode()`、`redactKey()` 与通用 `safeErrorMessage()` +- 已将 Server 侧 Discord pairing notification stub 改为输出结构化日志,并对 pairing code 做脱敏显示 +- 已将 Server transport 的 WebSocket error 日志统一改为经 `safeErrorMessage()` 处理后的安全错误信息 +- 当前仍未覆盖所有未来 auth/persistence 日志点,但 v1 现有显式日志路径已不再直接打印配对码原文 + --- ### YNX-1002 实现 malformed / unsupported / unauthorized 防御 +**状态** +- [x] 已完成(2026-04-08) + **目标** - 所有非法输入都能被可控拒绝 @@ -644,9 +951,18 @@ - 错误不会导致进程崩溃 - 客户端能收到明确错误反馈或断开 +**已完成内容** +- Server runtime 已为 builtin 解码增加 `CodecError` 捕获,malformed builtin message 会返回 `error(MALFORMED_MESSAGE)` 而不是直接抛出 +- Server runtime 已为未支持的 builtin `type` 返回显式错误响应,避免静默吞掉非法协议帧 +- Client runtime 已为 builtin 解码增加容错,不再因为坏帧直接抛出;当前会记录最近一次失败原因供上层状态观察 +- Client / Server 的 rule message 发送校验已收敛到共享 codec,而不再依赖脆弱的本地正则片段判断 + --- ### YNX-1003 实现单 identifier 单活跃连接策略 +**状态** +- [x] 已完成(2026-04-08) + **目标** - 同一 client identifier 只允许一个活跃认证连接 @@ -658,9 +974,23 @@ **验收标准** - 任意时刻同一 identifier 只有一个有效 session +**已完成内容** +- 已重构 `YonexusServerTransport`: + - `tempConnections` 改为 Map 结构,跟踪未认证连接 + - 新增 `assignIdentifierToTemp()`:hello 时仅分配 identifier,不进入认证注册表 + - 新增 `promoteToAuthenticated()`:认证成功后晋升为正式连接,此时才关闭旧连接 + - 新增 `removeTempConnection()`:认证失败时清理临时连接 +- 已更新 `runtime.ts`: + - hello 处理使用 `assignIdentifierToTemp()` 代替 `registerConnection()` + - auth_success 后调用 `promoteToAuthenticated()` 完成连接晋升 +- **安全改进**:未认证连接无法踢掉已认证连接,防止连接劫持攻击 + --- ### YNX-1004 实现重启恢复策略 +**状态** +- [x] 已完成(2026-04-08) + **目标** - 重启后行为可预期且文档一致 @@ -673,27 +1003,128 @@ **验收标准** - 重启后的认证/配对行为稳定可解释 +**已完成内容** +- **持久化恢复**:`YonexusServerRuntime.start()` 加载持久化记录,恢复所有 client records +- **allowlist 同步**:对配置中的 `followerIdentifiers` 自动创建缺失的初始记录 +- **rolling windows 清理**:`deserializeClientRecord()` 明确清空 `recentNonces` 和 `recentHandshakeAttempts` + - v1 设计:安全窗口仅内存驻留,重启后重建,防止旧 nonce 积压 +- **pending pairing 恢复**:保留 `pairingStatus=pending` 和 `pairingCode`,支持重启后继续配对流程 +- **连接状态重建**: + - 所有连接状态标记为 `offline`(WebSocket 连接不可恢复) + - `lastHeartbeatAt` 保留,liveness sweep 会根据其判断状态转移 +- **已文档化**:在 `persistence.ts` 和 `types.ts` 中以 JSDoc 注释明确重启语义 + --- ## Phase 11 — 测试与联调 ### YNX-1101 编写协议单元测试 +**状态** +- [x] 已完成(2026-04-09) + **目标** - 覆盖编解码、字段校验、错误码 +**已完成内容** +- 新增 `Yonexus.Protocol/tests/codec.test.ts` +- 覆盖 builtin 编解码、rule message/rewritten message 解析、保留字与非法标识符校验 +- 覆盖 encode/parse 边界、malformed message 报错与 `isBuiltinMessage` 行为 + ### YNX-1102 编写 Server 单元测试 +**状态** +- [x] 已完成(2026-04-09) + **目标** - 覆盖 registry、pairing、auth、heartbeat sweep +**已完成内容** +- 已为 `Yonexus.Server` 接入 `vitest` 测试基础设施(`package.json` + `vitest.config.ts`) +- 已新增 `tests/pairing-and-rules.test.ts` +- 已覆盖 pairing request 创建、有效/无效/过期 code 校验、通知状态迁移 +- 已覆盖 server rule registry 的 exact-match dispatch、保留字拒绝、重复注册拒绝 +- 已新增 `tests/pairing-auth-liveness.test.ts` +- 已覆盖 auth 校验(public key/timestamp/nonce/rate limit/签名)与心跳 liveness 判定 +- 已新增 `tests/runtime-flow.test.ts` +- 已覆盖 hello -> pair_request、pair_confirm -> auth_request -> heartbeat、未认证 rule message 拒绝、liveness sweep 触发 `status_update` / `disconnect_notice` +- 补齐测试过程中暴露的服务端 publicKey 标准化问题:hello 阶段已统一 trim,避免 PEM 尾部换行导致 `auth_request` 误判 `invalid_signature` + ### YNX-1103 编写 Client 单元测试 +**状态** +- [x] 已完成(2026-04-09) + **目标** - 覆盖状态机、keypair、auth 构造、heartbeat timer +**已完成内容** +- 已为 `Yonexus.Client` 接入 `vitest` 测试基础设施(`package.json` + `vitest.config.ts`) +- 已新增 `tests/state-and-rules.test.ts` +- 已覆盖 client state 文件缺失初始化、状态保存/回读 +- 已覆盖 `ensureClientKeyPair()` 的首次生成/重复调用复用,以及签名/验签基本链路 +- 已覆盖 client rule registry 的 dispatch、保留字拒绝、重复注册拒绝 +- 已新增 `tests/state-auth-heartbeat.test.ts` +- 已覆盖 auth_request signing input、nonce/timestamp 校验、状态机迁移与 heartbeat scheduler 行为 +- 已新增 `tests/runtime-flow.test.ts` +- 已覆盖启动时加载 state + 自动补 keypair、`hello_ack`/`pair_request`/`pair_success`/`auth_success` 协作链路、`auth_failed`/`re_pair_required` 的 trust reset,以及认证后 heartbeat 发送门禁 + ### YNX-1104 编写 Server-Client 集成测试 +**状态** +- [x] 已完成(2026-04-09) + **目标** - 覆盖首次配对、正常重连、认证失败、心跳超时、re-pair +**已完成内容** +- 已创建 `tests/integration/framework.test.ts` 集成测试框架 +- 提供 `MockTransportPair` 模拟 Server-Client 网络通信 +- 提供 `createIntegrationTestContext()` 快速创建集成测试环境 +- 已修正集成测试框架中的时间推进问题,`advanceTime()` 现在会真实驱动 Server / Client runtime 的 `now()` +- 已实现以下集成测试用例: + - 首次配对完整流程(hello → pair_request → pair_confirm → auth → heartbeat) + - 带凭证的重连流程(跳过配对直接认证) + - 认证后的心跳交换验证 + - 心跳超时触发 `unstable` / `offline` 与 `disconnect_notice` + - nonce collision 触发 `re_pair_required` 后 client 回退到 `pair_required` + +**待完成** +- 并发连接等剩余边界场景 +- 真实 WebSocket 传输层集成测试(可选) + +--- +### YNX-1104a 细化:首次配对集成测试 +**状态** +- [x] 已完成(2026-04-09) + +**已完成内容** +- `First-Time Pairing Flow` 测试套件 +- 验证端到端的配对与认证状态迁移 + +--- +### YNX-1104b 细化:重连集成测试 +**状态** +- [x] 已完成(2026-04-09) + +**已完成内容** +- `Reconnection Flow` 测试套件 +- 验证已配对客户端跳过配对直接进入认证 + ### YNX-1105 编写失败路径测试矩阵 +**状态** +- [x] 已完成(2026-04-09) + +**进展补充(2026-04-09)** +- 基于本轮锁定的 v1 边界,`AF-04` 已明确按 v1 语义并入 `invalid_signature`,不再作为“未完成测试缺口”单独追踪;若后续要恢复 `invalid_secret`,需先同步调整协议与实现 +- `RP-03`(管理员主动撤销)与 `RP-04`(key rotation)继续保留为 v2+ 议题,与 `PLAN.md` / `FEAT.md` 的 v1 边界保持一致 +- 已补充 `CF-05`:`hello` 缺失 payload 时返回 `MALFORMED_MESSAGE` 且保持连接可继续诊断 +- 已补充 `SR-04`:Client 首次运行/无凭证状态会自动补 keypair,并在 `hello_ack(pair_required)` 后进入完整配对流,不需要手工预置 state +- 已同步将 `CF-07`(保留字 rule 注册拒绝)在失败路径矩阵里标记为已覆盖,和现有 Client/Server rule registry 测试保持一致 +- 本轮新增 `CF-01` / `CF-02`:补齐 client transport 在网络分区与首次建连失败时的指数退避重连测试 +- 本轮新增 `SR-02`:补齐 server restart 后 active session 不恢复、但 durable trust record 保留且客户端需要重新 hello/auth 的恢复测试 +- 已新增 umbrella 仓库一键回归入口 `scripts/validate-v1.sh`,把 Protocol / Server / Client 的最小 v1 验证串成单次执行 +- 本轮继续补齐 `Yonexus.Protocol` 本地依赖与 `check` 脚本,使 umbrella 校验链路覆盖协议层类型检查 +- 已将 `scripts/validate-v1.sh` 调整为缺依赖时自动执行 `npm ci` / `npm install`,减少首次回归阻塞 +- 已完成一次完整 umbrella 回归:`Yonexus.Protocol` / `Yonexus.Server` / `Yonexus.Client` 的 check + test 全绿 +- 已顺手修复本轮回归暴露的 TypeScript 收尾问题:Protocol payload 泛型约束、Server `pairedAt` 持久化字段、Client/Server config 严格模式报错、测试态 runtime 通知服务注入 + **目标** - 系统性覆盖 pairing/auth 失败路径 @@ -711,30 +1142,132 @@ **验收标准** - 核心安全路径都有自动化测试 +**已完成内容** +- 已创建 `tests/failure-path/MATRIX.md` 失败路径测试矩阵文档 + - 定义 PF-01~PF-10(Pairing Failures) + - 定义 AF-01~AF-11(Authentication Failures) + - 定义 RP-01~RP-04(Re-pairing Triggers) + - 定义 CF-01~CF-07(Connection Failures) + - 定义 HF-01~HF-04(Heartbeat Failures) + - 定义 SR-01~SR-06(State Recovery) + - 标记优先级(🔴 Phase 1 关键安全路径) + +- 已创建 `tests/failure-path/pairing-failures.test.ts` + - PF-01: 无效配对码及重试机制 + - PF-02: 过期配对码清理 + - PF-03: 非 allowlist 标识符拒绝 + - PF-04: 管理员通知失败处理 + - PF-05: 空/空白配对码拒绝 + - PF-06: 畸形 pair_confirm 载荷处理 + - PF-07: 已配对客户端重复配对保护 + - Edge Cases: 并发配对、过期清理验证 + +- 已新增 `Yonexus.Server/tests/auth-failures.test.ts` + - AF-01 / AF-02:unknown identifier、not_paired + - AF-03 / AF-09 / AF-11:invalid signature、wrong public key、tampered proof + - AF-05 / AF-06:stale / future timestamp + - AF-07 / AF-08:nonce collision / rate limit 触发 re_pair_required + - AF-10:malformed auth_request payload + - 覆盖 re_pair 后 secret 清理与 pairingStatus=revoked +- 已同步更新 `tests/failure-path/MATRIX.md` 的 PF / AF / RP / HF 状态标记与当前备注 +- 已新增 `Yonexus.Server/tests/connection-heartbeat-failures.test.ts` + - CF-06:未认证 rule message 会关闭连接 + - HF-03:认证前心跳会返回 `AUTH_FAILED` + - HF-04:无会话心跳会返回 `AUTH_FAILED` +- 已新增 `Yonexus.Server/tests/state-recovery.test.ts` + - SR-01 / PF-09:server restart with pending pairing,验证 pending pairing 与 pairing code 在重启后保留,hello 重新进入 `waiting_pair_confirm` + - SR-05:损坏的 server store 会抛出 `YonexusServerStoreCorruptionError` +- 已扩展 `Yonexus.Client/tests/state-and-rules.test.ts` + - SR-06:损坏的 client state 会抛出 `YonexusClientStateCorruptionError` +- 已同步更新 `tests/failure-path/MATRIX.md`,标记 PF-09、SR-01、SR-05、SR-06 为已覆盖 +- 已新增 `Yonexus.Server/tests/connection-heartbeat-failures.test.ts` 补齐: + - CF-03:重复认证连接晋升时关闭旧连接并保留新连接 + - CF-04:协议版本不匹配时返回 `UNSUPPORTED_PROTOCOL_VERSION` 并主动断开 +- 已新增 `Yonexus.Client/tests/runtime-flow.test.ts` 恢复场景: + - SR-03:客户端带既有 secret + keypair 重启后直接进入 auth flow,不重新配对 +- 已同步更新 `tests/failure-path/MATRIX.md`,标记 RP-02、CF-03、CF-04、SR-03 为已覆盖 +- 已新增 `Yonexus.Server/tests/runtime-flow.test.ts`: + - CF-05:`hello` 缺失 payload 时返回 `error(MALFORMED_MESSAGE)` 且不主动断开连接 +- 已新增 `Yonexus.Client/tests/runtime-flow.test.ts`: + - SR-04:客户端在无 secret/首次运行状态下启动后可自动进入 `pair_required`,无需手工 bootstrap 本地 state +- 已同步更新 `tests/failure-path/MATRIX.md`,标记 CF-05、CF-07、SR-04 为已覆盖 +- 已新增 `Yonexus.Client/tests/transport-reconnect.test.ts` + - CF-02:首次连接失败时按 1s → 2s 指数退避继续重试,成功后恢复 connected + - CF-01:已建立连接在异常 close(network partition)后会按退避策略发起重连 +- 已扩展 `Yonexus.Server/tests/state-recovery.test.ts` + - SR-02:server restart 后不恢复内存 session,保留 durable paired trust,并要求 client 重新 `hello` 后进入 `auth_required` +- 已新增 `Yonexus.Server/tests/notifications.test.ts` + - PF-04:覆盖 Discord DM 消息格式、成功发送、DM channel 创建失败、配置缺失 +- 已扩展 `tests/failure-path/pairing-failures.test.ts` + - PF-08:已配对且在线的 client 再次发起 `pair_confirm` 时会被拒绝,且旧 trust material 保持不变 +- 已扩展 `Yonexus.Client/tests/runtime-flow.test.ts` + - PF-10:client 在 waiting_pair_confirm 阶段重启后,可重新 hello 并恢复到等待 out-of-band pairing code 的流程 +- 已同步更新 `tests/failure-path/MATRIX.md`,标记 CF-01、CF-02、PF-08、PF-10、SR-02 为已覆盖 + +**剩余说明(不阻塞 v1)** +- RP-03 / RP-04:管理员主动撤销与 key rotation 语义仍未实现,继续按 v2+ 保留 +- PF-04 当前已覆盖运行时失败路径与通知服务单测,但真实 Discord 环境 smoke test 仍属于后续非阻塞验证项 + --- ## Phase 12 — 文档与交付 ### YNX-1201 补齐 Server README +**状态** +- [x] 已完成(2026-04-08) + **目标** - Server 仓库可独立被安装与使用 +**已完成内容** +- 已补齐 `Yonexus.Server/README.md` +- 已写明当前实现范围、配置字段、启动/连接流程、公开 API、持久化语义与开发方式 +- 已明确当前限制项(真实 Discord DM、测试覆盖、生命周期集成等),避免 README 过度承诺 + ### YNX-1202 补齐 Client README +**状态** +- [x] 已完成(2026-04-08) + **目标** - Client 仓库可独立被安装与使用 +**已完成内容** +- 已补齐 `Yonexus.Client/README.md` +- 已写明配置模型、启动/配对/认证流程、公开 API、本地 state 结构与开发方式 +- 已明确当前限制项(测试、配对输入 UX、生命周期集成等),方便后续交接和联调 + ### YNX-1203 输出部署文档 +**状态** +- [x] 已完成(2026-04-08) + **目标** - 写清楚单主多从部署方式、配置示例、配对流程 +**已完成内容** +- 新增 `DEPLOYMENT.md`,覆盖拓扑、子模块同步、Server/Client 安装、配置示例与配对流程 + ### YNX-1204 输出运维排障文档 +**状态** +- [x] 已完成(2026-04-08) + **目标** - 写清楚常见报错、状态含义、恢复步骤 +**已完成内容** +- 新增 `OPERATIONS.md`,覆盖状态说明、常见错误与恢复建议 + ### YNX-1205 输出协议测试与验收清单 +**状态** +- [x] 已完成(2026-04-08) + **目标** - 让后续改动有统一回归基线 +**已完成内容** +- 已新增 `ACCEPTANCE.md` +- 已按协议层、Server、Client、联调、失败路径回归矩阵拆分验收项 +- 已把 `YNX-1101`~`YNX-1105` 与具体验收/测试目标建立对应关系,方便后续补自动化测试时直接对照 + --- ## 推荐执行顺序(最小闭环) diff --git a/Yonexus.Client b/Yonexus.Client index 5234358..8b26919 160000 --- a/Yonexus.Client +++ b/Yonexus.Client @@ -1 +1 @@ -Subproject commit 5234358cac047e4b0e31f4010535c98efd86f20a +Subproject commit 8b2691979091a0e2b4c5d04fde6a0a455e87126f diff --git a/Yonexus.Protocol b/Yonexus.Protocol index 9232aa7..2611304 160000 --- a/Yonexus.Protocol +++ b/Yonexus.Protocol @@ -1 +1 @@ -Subproject commit 9232aa7c1755adda6990a5a2f6c7c1a114285a73 +Subproject commit 26113040844cc6804e6a2b617d0c9ce1cbdb92df diff --git a/Yonexus.Server b/Yonexus.Server index d8290c0..a8748f8 160000 --- a/Yonexus.Server +++ b/Yonexus.Server @@ -1 +1 @@ -Subproject commit d8290c0aa74a6dcdf3fccdc6fea47fb48ffbcf1f +Subproject commit a8748f8c5526dac548dea49c376e85376d0c8765 diff --git a/scripts/validate-v1.sh b/scripts/validate-v1.sh new file mode 100755 index 0000000..19ffba2 --- /dev/null +++ b/scripts/validate-v1.sh @@ -0,0 +1,47 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" + +run_step() { + local title="$1" + shift + + echo + echo ">>> ${title}" + "$@" +} + +ensure_node_modules() { + local dir="$1" + + if [[ -d "$ROOT_DIR/$dir/node_modules" ]]; then + return 0 + fi + + local install_cmd="npm install" + if [[ -f "$ROOT_DIR/$dir/package-lock.json" ]]; then + install_cmd="npm ci" + fi + + echo "Dependencies missing in $dir (node_modules not found). Bootstrapping with: ${install_cmd}" + run_step "${dir}: ${install_cmd}" bash -lc "cd '$ROOT_DIR/$dir' && ${install_cmd}" +} + +run_npm_script() { + local dir="$1" + local script="$2" + + ensure_node_modules "$dir" + run_step "${dir}: npm run ${script}" bash -lc "cd '$ROOT_DIR/$dir' && npm run ${script}" +} + +run_npm_script "Yonexus.Protocol" check +run_npm_script "Yonexus.Protocol" test +run_npm_script "Yonexus.Server" check +run_npm_script "Yonexus.Server" test +run_npm_script "Yonexus.Client" check +run_npm_script "Yonexus.Client" test + +echo +printf 'Yonexus v1 validation passed.\n' diff --git a/tests/docker/client-test-plugin/index.mjs b/tests/docker/client-test-plugin/index.mjs new file mode 100644 index 0000000..b1cab47 --- /dev/null +++ b/tests/docker/client-test-plugin/index.mjs @@ -0,0 +1,33 @@ +// Singleton guard — openclaw calls register() twice per process +let _registered = false; + +export default function register(_api) { + if (_registered) return; + _registered = true; + + const client = globalThis.__yonexusClient; + if (!client) { + console.error('[client-test] __yonexusClient not on globalThis — ensure Yonexus.Client loads first'); + return; + } + + console.log('[client-test] __yonexusClient available, keys:', Object.keys(client)); + + // Register test_pong rule + // Received format (plain rule message from server): test_pong:: + client.ruleRegistry.registerRule('test_pong', (raw) => { + const sep = raw.indexOf('::'); + const content = raw.slice(sep + 2); + console.log(`[client-test] MATCH test_pong content="${content}"`); + }); + + // When authenticated, send one matching and one non-matching rule message to server + client.onAuthenticated.push(() => { + console.log('[client-test] Authenticated — sending test_ping + other_rule to server'); + const s1 = client.sendRule('test_ping', 'hello-from-client'); + const s2 = client.sendRule('other_rule', 'other-from-client'); + console.log(`[client-test] sendRule results: test_ping=${s1} other_rule=${s2}`); + }); + + console.log('[client-test] registered test_pong rule and onAuthenticated callback'); +} diff --git a/tests/docker/client-test-plugin/openclaw.plugin.json b/tests/docker/client-test-plugin/openclaw.plugin.json new file mode 100644 index 0000000..6eac772 --- /dev/null +++ b/tests/docker/client-test-plugin/openclaw.plugin.json @@ -0,0 +1,13 @@ +{ + "id": "yonexus-client-test", + "name": "Yonexus Client Test Plugin", + "version": "0.1.0", + "description": "Test plugin for Yonexus.Client rule routing", + "entry": "./index.mjs", + "permissions": [], + "configSchema": { + "type": "object", + "additionalProperties": false, + "properties": {} + } +} diff --git a/tests/docker/client/Dockerfile b/tests/docker/client/Dockerfile new file mode 100644 index 0000000..5dc4bfd --- /dev/null +++ b/tests/docker/client/Dockerfile @@ -0,0 +1,37 @@ +# Build context: repo root (Yonexus/) +# ── Stage 1: compile ────────────────────────────────────────────────────────── +FROM node:22-alpine AS builder + +WORKDIR /build + +# Client imports Yonexus.Protocol only +COPY Yonexus.Protocol/src ./Yonexus.Protocol/src + +COPY Yonexus.Client/package.json ./Yonexus.Client/ +COPY Yonexus.Client/package-lock.json ./Yonexus.Client/ +COPY Yonexus.Client/tsconfig.json ./Yonexus.Client/ +COPY Yonexus.Client/plugin ./Yonexus.Client/plugin + +WORKDIR /build/Yonexus.Client +RUN npm ci +RUN npm run build + +# ── Stage 2: runtime ───────────────────────────────────────────────────────── +FROM node:22-alpine AS runtime + +RUN npm install -g openclaw@2026.4.9 + +WORKDIR /app + +# Layout expected by install.mjs: repoRoot = /app, sourceDist = /app/dist +COPY --from=builder /build/Yonexus.Client/dist ./dist +COPY --from=builder /build/Yonexus.Client/node_modules ./node_modules +COPY Yonexus.Client/package.json ./package.json +COPY Yonexus.Client/plugin/openclaw.plugin.json ./plugin/openclaw.plugin.json +COPY Yonexus.Client/scripts/install.mjs ./scripts/install.mjs + +COPY tests/docker/client-test-plugin /app/client-test-plugin +COPY tests/docker/client/entrypoint.sh /entrypoint.sh +RUN chmod +x /entrypoint.sh + +ENTRYPOINT ["/entrypoint.sh"] diff --git a/tests/docker/client/entrypoint.sh b/tests/docker/client/entrypoint.sh new file mode 100644 index 0000000..591985f --- /dev/null +++ b/tests/docker/client/entrypoint.sh @@ -0,0 +1,69 @@ +#!/bin/sh +set -e + +: "${IDENTIFIER:?IDENTIFIER is required}" +: "${NOTIFY_BOT_TOKEN:?NOTIFY_BOT_TOKEN is required}" +: "${ADMIN_USER_ID:?ADMIN_USER_ID is required}" +: "${YONEXUS_SERVER_URL:?YONEXUS_SERVER_URL is required}" + +STATE_DIR=/app/.openclaw-state +PLUGIN_DIR="$STATE_DIR/plugins/Yonexus.Client" +TEST_PLUGIN_DIR="$STATE_DIR/plugins/yonexus-client-test" + +# Install plugin dist + manifest into isolated state directory +node /app/scripts/install.mjs --install --openclaw-profile-path "$STATE_DIR" +# Symlink node_modules so bare-module imports (e.g. ws) resolve from plugin dir +ln -sf /app/node_modules "$PLUGIN_DIR/node_modules" + +# Install test plugin (plain .mjs, no compilation needed) +mkdir -p "$TEST_PLUGIN_DIR" +cp /app/client-test-plugin/index.mjs "$TEST_PLUGIN_DIR/" +cp /app/client-test-plugin/openclaw.plugin.json "$TEST_PLUGIN_DIR/" + +# Write openclaw config — plugin id is "yonexus-client" per openclaw.plugin.json +mkdir -p "$STATE_DIR" +cat > "$STATE_DIR/openclaw.json" << EOF +{ + "meta": { "lastTouchedVersion": "2026.4.9" }, + "gateway": { "bind": "loopback" }, + "agents": { "defaults": { "workspace": "$STATE_DIR/workspace" } }, + "plugins": { + "allow": ["yonexus-client", "yonexus-client-test"], + "load": { "paths": ["$PLUGIN_DIR", "$TEST_PLUGIN_DIR"] }, + "installs": { + "yonexus-client": { + "source": "path", + "sourcePath": "$PLUGIN_DIR", + "installPath": "$PLUGIN_DIR", + "version": "0.1.0", + "installedAt": "2026-04-10T00:00:00.000Z" + }, + "yonexus-client-test": { + "source": "path", + "sourcePath": "$TEST_PLUGIN_DIR", + "installPath": "$TEST_PLUGIN_DIR", + "version": "0.1.0", + "installedAt": "2026-04-10T00:00:00.000Z" + } + }, + "entries": { + "yonexus-client": { + "enabled": true, + "config": { + "mainHost": "$YONEXUS_SERVER_URL", + "identifier": "$IDENTIFIER", + "notifyBotToken": "$NOTIFY_BOT_TOKEN", + "adminUserId": "$ADMIN_USER_ID" + } + }, + "yonexus-client-test": { + "enabled": true, + "config": {} + } + } + } +} +EOF + +export OPENCLAW_STATE_DIR="$STATE_DIR" +exec openclaw gateway run --allow-unconfigured diff --git a/tests/docker/docker-compose.yml b/tests/docker/docker-compose.yml new file mode 100644 index 0000000..5124a63 --- /dev/null +++ b/tests/docker/docker-compose.yml @@ -0,0 +1,46 @@ +services: + yonexus-server: + build: + context: ../.. + dockerfile: tests/docker/server/Dockerfile + environment: + # Identifier the client will use — must match IDENTIFIER on the client side + CLIENT_IDENTIFIER: test-client + # Required: Discord bot token and admin user ID for pairing notifications + NOTIFY_BOT_TOKEN: ${NOTIFY_BOT_TOKEN} + ADMIN_USER_ID: ${ADMIN_USER_ID} + # Optional: override the publicWsUrl advertised to clients + # PUBLIC_WS_URL: ws://yonexus-server:8787 + networks: + - yonexus-net + healthcheck: + # Wait until the Yonexus WebSocket port is accepting connections + test: + - CMD + - node + - -e + - "require('net').createConnection({port:8787,host:'127.0.0.1'}).on('connect',()=>process.exit(0)).on('error',()=>process.exit(1))" + interval: 5s + timeout: 3s + retries: 12 + start_period: 15s + + yonexus-client: + build: + context: ../.. + dockerfile: tests/docker/client/Dockerfile + environment: + # Must match CLIENT_IDENTIFIER on the server side + IDENTIFIER: test-client + NOTIFY_BOT_TOKEN: ${NOTIFY_BOT_TOKEN} + ADMIN_USER_ID: ${ADMIN_USER_ID} + YONEXUS_SERVER_URL: ws://yonexus-server:8787 + networks: + - yonexus-net + depends_on: + yonexus-server: + condition: service_healthy + +networks: + yonexus-net: + driver: bridge diff --git a/tests/docker/server-test-plugin/index.mjs b/tests/docker/server-test-plugin/index.mjs new file mode 100644 index 0000000..1df9ad8 --- /dev/null +++ b/tests/docker/server-test-plugin/index.mjs @@ -0,0 +1,39 @@ +// Singleton guard — openclaw calls register() twice per process +let _registered = false; + +export default function register(_api) { + if (_registered) return; + _registered = true; + + const server = globalThis.__yonexusServer; + if (!server) { + console.error('[server-test] __yonexusServer not on globalThis — ensure Yonexus.Server loads first'); + return; + } + + console.log('[server-test] __yonexusServer available, keys:', Object.keys(server)); + + // Register test_ping rule + // Received format (rewritten by server): test_ping:::: + server.ruleRegistry.registerRule('test_ping', (raw) => { + const firstSep = raw.indexOf('::'); + const rest = raw.slice(firstSep + 2); + const secondSep = rest.indexOf('::'); + const sender = rest.slice(0, secondSep); + const content = rest.slice(secondSep + 2); + console.log(`[server-test] MATCH test_ping from="${sender}" content="${content}"`); + // Echo back to sender via test_pong + const sent = server.sendRule(sender, 'test_pong', `echo-${content}`); + console.log(`[server-test] echo sent=${sent}`); + }); + + // When a client authenticates, send one matching and one non-matching rule message + server.onClientAuthenticated.push((identifier) => { + console.log(`[server-test] Client "${identifier}" authenticated — sending test_pong + other_rule`); + const s1 = server.sendRule(identifier, 'test_pong', 'welcome-from-server'); + const s2 = server.sendRule(identifier, 'other_rule', 'other-from-server'); + console.log(`[server-test] sendRule results: test_pong=${s1} other_rule=${s2}`); + }); + + console.log('[server-test] registered test_ping rule and onClientAuthenticated callback'); +} diff --git a/tests/docker/server-test-plugin/openclaw.plugin.json b/tests/docker/server-test-plugin/openclaw.plugin.json new file mode 100644 index 0000000..70f4e85 --- /dev/null +++ b/tests/docker/server-test-plugin/openclaw.plugin.json @@ -0,0 +1,13 @@ +{ + "id": "yonexus-server-test", + "name": "Yonexus Server Test Plugin", + "version": "0.1.0", + "description": "Test plugin for Yonexus.Server rule routing", + "entry": "./index.mjs", + "permissions": [], + "configSchema": { + "type": "object", + "additionalProperties": false, + "properties": {} + } +} diff --git a/tests/docker/server/Dockerfile b/tests/docker/server/Dockerfile new file mode 100644 index 0000000..54e0d56 --- /dev/null +++ b/tests/docker/server/Dockerfile @@ -0,0 +1,40 @@ +# Build context: repo root (Yonexus/) +# ── Stage 1: compile ────────────────────────────────────────────────────────── +FROM node:22-alpine AS builder + +WORKDIR /build + +# Server imports Yonexus.Protocol and Yonexus.Client/crypto — all needed for tsc +COPY Yonexus.Protocol/src ./Yonexus.Protocol/src +COPY Yonexus.Client/plugin/crypto ./Yonexus.Client/plugin/crypto + +COPY Yonexus.Server/package.json ./Yonexus.Server/ +COPY Yonexus.Server/package-lock.json ./Yonexus.Server/ +COPY Yonexus.Server/tsconfig.json ./Yonexus.Server/ +COPY Yonexus.Server/plugin ./Yonexus.Server/plugin + +WORKDIR /build/Yonexus.Server +RUN npm ci +RUN npm run build + +# ── Stage 2: runtime ───────────────────────────────────────────────────────── +FROM node:22-alpine AS runtime + +RUN npm install -g openclaw@2026.4.9 + +WORKDIR /app + +# Layout expected by install.mjs: repoRoot = /app, sourceDist = /app/dist +COPY --from=builder /build/Yonexus.Server/dist ./dist +COPY --from=builder /build/Yonexus.Server/node_modules ./node_modules +COPY Yonexus.Server/package.json ./package.json +COPY Yonexus.Server/plugin/openclaw.plugin.json ./plugin/openclaw.plugin.json +COPY Yonexus.Server/scripts/install.mjs ./scripts/install.mjs + +COPY tests/docker/server-test-plugin /app/server-test-plugin +COPY tests/docker/server/entrypoint.sh /entrypoint.sh +RUN chmod +x /entrypoint.sh + +EXPOSE 8787 + +ENTRYPOINT ["/entrypoint.sh"] diff --git a/tests/docker/server/entrypoint.sh b/tests/docker/server/entrypoint.sh new file mode 100644 index 0000000..e0a58a6 --- /dev/null +++ b/tests/docker/server/entrypoint.sh @@ -0,0 +1,71 @@ +#!/bin/sh +set -e + +: "${CLIENT_IDENTIFIER:?CLIENT_IDENTIFIER is required}" +: "${NOTIFY_BOT_TOKEN:?NOTIFY_BOT_TOKEN is required}" +: "${ADMIN_USER_ID:?ADMIN_USER_ID is required}" + +STATE_DIR=/app/.openclaw-state +PLUGIN_DIR="$STATE_DIR/plugins/Yonexus.Server" +TEST_PLUGIN_DIR="$STATE_DIR/plugins/yonexus-server-test" +SERVER_WS_URL="${PUBLIC_WS_URL:-ws://yonexus-server:8787}" + +# Install plugin dist + manifest into isolated state directory +node /app/scripts/install.mjs --install --openclaw-profile-path "$STATE_DIR" +# Symlink node_modules so bare-module imports (e.g. ws) resolve from plugin dir +ln -sf /app/node_modules "$PLUGIN_DIR/node_modules" + +# Install test plugin (plain .mjs, no compilation needed) +mkdir -p "$TEST_PLUGIN_DIR" +cp /app/server-test-plugin/index.mjs "$TEST_PLUGIN_DIR/" +cp /app/server-test-plugin/openclaw.plugin.json "$TEST_PLUGIN_DIR/" + +# Write openclaw config — plugin id is "yonexus-server" per openclaw.plugin.json +mkdir -p "$STATE_DIR" +cat > "$STATE_DIR/openclaw.json" << EOF +{ + "meta": { "lastTouchedVersion": "2026.4.9" }, + "gateway": { "bind": "loopback" }, + "agents": { "defaults": { "workspace": "$STATE_DIR/workspace" } }, + "plugins": { + "allow": ["yonexus-server", "yonexus-server-test"], + "load": { "paths": ["$PLUGIN_DIR", "$TEST_PLUGIN_DIR"] }, + "installs": { + "yonexus-server": { + "source": "path", + "sourcePath": "$PLUGIN_DIR", + "installPath": "$PLUGIN_DIR", + "version": "0.1.0", + "installedAt": "2026-04-10T00:00:00.000Z" + }, + "yonexus-server-test": { + "source": "path", + "sourcePath": "$TEST_PLUGIN_DIR", + "installPath": "$TEST_PLUGIN_DIR", + "version": "0.1.0", + "installedAt": "2026-04-10T00:00:00.000Z" + } + }, + "entries": { + "yonexus-server": { + "enabled": true, + "config": { + "followerIdentifiers": ["$CLIENT_IDENTIFIER"], + "notifyBotToken": "$NOTIFY_BOT_TOKEN", + "adminUserId": "$ADMIN_USER_ID", + "listenHost": "0.0.0.0", + "listenPort": 8787, + "publicWsUrl": "$SERVER_WS_URL" + } + }, + "yonexus-server-test": { + "enabled": true, + "config": {} + } + } + } +} +EOF + +export OPENCLAW_STATE_DIR="$STATE_DIR" +exec openclaw gateway run --allow-unconfigured diff --git a/tests/failure-path/MATRIX.md b/tests/failure-path/MATRIX.md new file mode 100644 index 0000000..355bd74 --- /dev/null +++ b/tests/failure-path/MATRIX.md @@ -0,0 +1,167 @@ +# Yonexus Failure Path Test Matrix + +This document defines the systematic test coverage for pairing and authentication failure scenarios. + +## Test Matrix Legend + +- ✅ = Test implemented +- 🔄 = Test stub exists, needs implementation +- ⬜ = Not yet implemented +- ⏸ = Deferred / intentionally out of v1 scope +- 🔴 = Critical path, high priority + +--- + +## 1. Pairing Failure Paths + +| ID | Scenario | Trigger | Expected Behavior | Status | +|----|----------|---------|-------------------|--------| +| PF-01 | Invalid pairing code | Client submits wrong code | `pair_failed(invalid_code)`, allow retry | ✅ | +| PF-02 | Expired pairing code | Client submits after expiry | `pair_failed(expired)`, reset to `pair_required` | ✅ | +| PF-03 | Identifier not in allowlist | Unknown client tries to pair | `pair_failed(identifier_not_allowed)`, close connection | ✅ | +| PF-04 | Admin notification failed | Discord DM fails to send | `pair_failed(admin_notification_failed)`, abort pairing | ✅ | +| PF-05 | Empty pairing code | Client submits empty string | `pair_failed(invalid_code)` | ✅ | +| PF-06 | Malformed pair_confirm payload | Missing required fields | Protocol error, no state change | ✅ | +| PF-07 | Double pairing attempt | Client calls pair_confirm twice | Second attempt rejected if already paired | ✅ | +| PF-08 | Pairing during active session | Paired client tries to pair again | Reject, maintain existing trust | ✅ | +| PF-09 | Server restart during pairing | Server restarts before confirm | Pairing state preserved, code still valid | ✅ | +| PF-10 | Client restart during pairing | Client restarts before submit | Client must restart pairing flow | ✅ | + +--- + +## 2. Authentication Failure Paths + +| ID | Scenario | Trigger | Expected Behavior | Status | +|----|----------|---------|-------------------|--------| +| AF-01 | Unknown identifier | Auth from unpaired client | `auth_failed(unknown_identifier)` | ✅ | +| AF-02 | Not paired | Auth before pairing complete | `auth_failed(not_paired)` | ✅ | +| AF-03 | Invalid signature | Wrong private key used | `auth_failed(invalid_signature)` | ✅ | +| AF-04 | Wrong secret | Client has outdated secret | 当前实现将其并入 `auth_failed(invalid_signature)`;`invalid_secret` 语义保留待后续确认 | ⏸ | +| AF-05 | Stale timestamp | Proof timestamp >10s old | `auth_failed(stale_timestamp)` | ✅ | +| AF-06 | Future timestamp | Proof timestamp in future | `auth_failed(future_timestamp)` | ✅ | +| AF-07 | Nonce collision | Reused nonce within window | `auth_failed(nonce_collision)` → `re_pair_required` 🔴 | ✅ | +| AF-08 | Rate limited | >10 attempts in 10s | `auth_failed(rate_limited)` → `re_pair_required` 🔴 | ✅ | +| AF-09 | Wrong public key | Key doesn't match stored | `auth_failed(invalid_signature)` | ✅ | +| AF-10 | Malformed auth_request | Missing required fields | Protocol error | ✅ | +| AF-11 | Tampered proof | Modified signature | `auth_failed(invalid_signature)` | ✅ | + +--- + +## 3. Re-pairing Triggers + +| ID | Scenario | Cause | Server Action | Client Action | Status | +|----|----------|-------|---------------|---------------|--------| +| RP-01 | Nonce collision | Replay attack detected | Clear secret, reset state | Enter `pair_required` | ✅ | +| RP-02 | Rate limit exceeded | Brute force detected | Clear secret, reset state | Enter `pair_required` | ✅ | +| RP-03 | Admin-initiated | Manual revocation | Mark revoked, notify | Enter `pair_required` | ⏸ | +| RP-04 | Key rotation | Client sends new public key | Update key, keep secret | Continue with new key | ⏸ | + +--- + +## 4. Connection Failure Paths + +| ID | Scenario | Trigger | Expected Behavior | Status | +|----|----------|---------|-------------------|--------| +| CF-01 | Network partition | Connection drops mid-auth | Client retries with backoff | ✅ | +| CF-02 | Server unreachable | Initial connect fails | Exponential backoff retry | ✅ | +| CF-03 | Duplicate connection | Same ID connects twice | Old connection closed, new accepted | ✅ | +| CF-04 | Protocol version mismatch | Unsupported version | Connection rejected with error | ✅ | +| CF-05 | Malformed hello | Invalid payload / missing required hello fields | Error response, connection maintained | ✅ | +| CF-06 | Unauthenticated rule message | Client sends before auth | Connection closed | ✅ | +| CF-07 | Reserved rule registration | Plugin tries `registerRule("builtin")` | Registration rejected | ✅ | + +--- + +## 5. Heartbeat Failure Paths + +| ID | Scenario | Trigger | Expected Behavior | Status | +|----|----------|---------|-------------------|--------| +| HF-01 | 7-minute timeout | No heartbeat received | Status → `unstable`, notify | ✅ | +| HF-02 | 11-minute timeout | Still no heartbeat | Status → `offline`, disconnect | ✅ | +| HF-03 | Early heartbeat | Heartbeat before auth | Rejected/ignored | ✅ | +| HF-04 | Heartbeat from unauthenticated | Wrong state | Error, possible disconnect | ✅ | + +--- + +## 6. State Recovery Scenarios + +| ID | Scenario | Condition | Expected Recovery | Status | +|----|----------|-----------|-------------------|--------| +| SR-01 | Server restart with pending pairing | Pairing in progress | Preserve pairing state, code valid | ✅ | +| SR-02 | Server restart with active sessions | Online clients | All marked offline, reconnect required | ✅ | +| SR-03 | Client restart with credentials | Has secret + keys | Resume with auth, no re-pairing | ✅ | +| SR-04 | Client restart without credentials | First run | Full pairing flow required | ✅ | +| SR-05 | Corrupted server store | File unreadable | Surface corruption error clearly for operator handling | ✅ | +| SR-06 | Corrupted client state | File unreadable | Surface corruption error clearly for operator handling | ✅ | + +--- + +## Implementation Priority + +### Phase 1: Critical Security Paths (🔴) +1. AF-07 Nonce collision → re-pairing +2. AF-08 Rate limiting → re-pairing +3. PF-04 Admin notification failure +4. CF-06 Unauthenticated message handling + +### Phase 2: Core Functionality +5. PF-01/02 Invalid/expired pairing codes +6. AF-03/04 Signature and secret validation +7. AF-05/06 Timestamp validation +8. HF-01/02 Heartbeat timeout handling + +### Phase 3: Edge Cases +9. All connection failure paths +10. State recovery scenarios +11. Double-attempt scenarios + +--- + +## Test Implementation Notes + +### Running the Matrix + +```bash +# Run specific failure path category +npm test -- pairing-failures +npm test -- auth-failures +npm test -- connection-failures + +# Run all failure path tests +npm test -- failure-paths +``` + +### Current Notes + +- AF-04 (`invalid_secret`) 目前明确按 v1 语义并入 `invalid_signature`,不再单独视为未完成缺口;若后续要保留独立错误码,需要先同步更新协议与实现。 +- RP-03(管理员主动撤销)与 RP-04(key rotation)继续作为 v2+ 议题保留,不阻塞当前 v1 交付判断。 +- 本轮已补齐 AF-01/02/03/05/06/09/10/11、RP-01/02、CF-01/02/03/04/05/07、HF-01/02、PF-08/09/10、SR-01/02/03/04/05/06。 + +### Umbrella Validation Entry Point + +在 umbrella 仓库根目录可运行: + +```bash +./scripts/validate-v1.sh +``` + +它会顺序执行: +- `Yonexus.Protocol` 测试 +- `Yonexus.Server` 类型检查 + 测试 +- `Yonexus.Client` 类型检查 + 测试 + +### Adding New Test Cases + +1. Add row to appropriate table above +2. Assign unique ID (PF-, AF-, RP-, CF-, HF-, SR- prefix) +3. Update status when implementing +4. Link to test file location + +--- + +## Cross-References + +- Protocol spec: `../PROTOCOL.md` +- Acceptance criteria: `../ACCEPTANCE.md` +- Server tests: `../Yonexus.Server/tests/` +- Client tests: `../Yonexus.Client/tests/` diff --git a/tests/failure-path/pairing-failures.test.ts b/tests/failure-path/pairing-failures.test.ts new file mode 100644 index 0000000..b295fce --- /dev/null +++ b/tests/failure-path/pairing-failures.test.ts @@ -0,0 +1,666 @@ +import { describe, expect, it, vi, beforeEach, afterEach } from "vitest"; +import { + decodeBuiltin, + encodeBuiltin, + buildHello, + buildHelloAck, + buildPairRequest, + buildPairConfirm, + buildPairFailed, + buildPairSuccess, + type PairConfirmPayload, + type PairFailedPayload, + YONEXUS_PROTOCOL_VERSION, + ProtocolErrorCode +} from "../../Yonexus.Protocol/src/index.js"; +import { createYonexusServerRuntime } from "../../Yonexus.Server/plugin/core/runtime.js"; +import type { ClientRecord } from "../../Yonexus.Server/plugin/core/persistence.js"; +import type { YonexusServerStore } from "../../Yonexus.Server/plugin/core/store.js"; +import type { ClientConnection, ServerTransport } from "../../Yonexus.Server/plugin/core/transport.js"; + +/** + * YNX-1105b: Pairing Failure Path Tests + * + * Covers: + * - PF-01: Invalid pairing code + * - PF-02: Expired pairing code + * - PF-03: Identifier not in allowlist + * - PF-04: Admin notification failed (partial - notification stub) + * - PF-05: Empty pairing code + * - PF-06: Malformed pair_confirm payload + * - PF-07: Double pairing attempt + */ + +// ============================================================================ +// Test Utilities +// ============================================================================ + +function createMockSocket() { + return { close: vi.fn() } as unknown as ClientConnection["ws"]; +} + +function createConnection(identifier: string | null = null): ClientConnection { + return { + identifier, + ws: createMockSocket(), + connectedAt: 1_710_000_000, + isAuthenticated: false + }; +} + +function createMockStore(initialClients: ClientRecord[] = []): YonexusServerStore { + const persisted = new Map(initialClients.map((r) => [r.identifier, r])); + return { + filePath: "/tmp/test.json", + load: vi.fn(async () => ({ + version: 1, + persistedAt: 1_710_000_000, + clients: new Map(persisted) + })), + save: vi.fn(async (clients: Iterable) => { + persisted.clear(); + for (const c of clients) persisted.set(c.identifier, c); + }) + }; +} + +function createMockTransport() { + const sent: Array<{ connection: ClientConnection; message: string }> = []; + const closed: Array<{ identifier: string; code?: number; reason?: string }> = []; + + const transport: ServerTransport = { + isRunning: false, + connections: new Map(), + start: vi.fn(), + stop: vi.fn(), + send: vi.fn((id: string, msg: string) => { sent.push({ connection: { identifier: id } as ClientConnection, message: msg }); return true; }), + sendToConnection: vi.fn((conn: ClientConnection, msg: string) => { sent.push({ connection: conn, message: msg }); return true; }), + broadcast: vi.fn(), + closeConnection: vi.fn((id: string, code?: number, reason?: string) => { closed.push({ identifier: id, code, reason }); return true; }), + promoteToAuthenticated: vi.fn(), + removeTempConnection: vi.fn(), + assignIdentifierToTemp: vi.fn() + }; + + return { transport, sent, closed }; +} + +// ============================================================================ +// Pairing Failure Path Tests +// ============================================================================ + +describe("YNX-1105b: Pairing Failure Paths", () => { + let now = 1_710_000_000; + + beforeEach(() => { + now = 1_710_000_000; + vi.useFakeTimers(); + }); + + afterEach(() => { + vi.useRealTimers(); + }); + + describe("PF-01: Invalid pairing code", () => { + it("returns pair_failed(invalid_code) when wrong code submitted", async () => { + const store = createMockStore(); + const { transport, sent } = createMockTransport(); + const runtime = createYonexusServerRuntime({ + config: { + followerIdentifiers: ["client-a"], + notifyBotToken: "test-token", + adminUserId: "admin", + listenHost: "127.0.0.1", + listenPort: 8787 + }, + store, + transport, + now: () => now + }); + + await runtime.start(); + + // Start pairing flow + const conn = createConnection(); + await runtime.handleMessage(conn, encodeBuiltin(buildHello( + { identifier: "client-a", hasSecret: false, hasKeyPair: true, protocolVersion: YONEXUS_PROTOCOL_VERSION }, + { timestamp: now } + ))); + + const pairingCode = runtime.state.registry.clients.get("client-a")?.pairingCode; + expect(pairingCode).toBeDefined(); + + // Submit wrong code + await runtime.handleMessage(conn, encodeBuiltin(buildPairConfirm( + { identifier: "client-a", pairingCode: "WRONG-CODE-999" }, + { timestamp: now + 10 } + ))); + + const lastMessage = decodeBuiltin(sent.at(-1)!.message); + expect(lastMessage.type).toBe("pair_failed"); + expect((lastMessage.payload as PairFailedPayload).reason).toBe("invalid_code"); + + // Client remains in pending state, can retry + expect(runtime.state.registry.clients.get("client-a")?.pairingStatus).toBe("pending"); + }); + + it("allows retry after invalid code failure", async () => { + const store = createMockStore(); + const { transport, sent } = createMockTransport(); + const runtime = createYonexusServerRuntime({ + config: { + followerIdentifiers: ["client-a"], + notifyBotToken: "test-token", + adminUserId: "admin", + listenHost: "127.0.0.1", + listenPort: 8787 + }, + store, + transport, + now: () => now + }); + + await runtime.start(); + + const conn = createConnection(); + await runtime.handleMessage(conn, encodeBuiltin(buildHello( + { identifier: "client-a", hasSecret: false, hasKeyPair: true, protocolVersion: YONEXUS_PROTOCOL_VERSION }, + { timestamp: now } + ))); + + const correctCode = runtime.state.registry.clients.get("client-a")?.pairingCode; + + // First attempt: wrong code + await runtime.handleMessage(conn, encodeBuiltin(buildPairConfirm( + { identifier: "client-a", pairingCode: "WRONG" }, + { timestamp: now + 10 } + ))); + expect(decodeBuiltin(sent.at(-1)!.message).type).toBe("pair_failed"); + + // Second attempt: correct code + await runtime.handleMessage(conn, encodeBuiltin(buildPairConfirm( + { identifier: "client-a", pairingCode: correctCode! }, + { timestamp: now + 20 } + ))); + expect(decodeBuiltin(sent.at(-1)!.message).type).toBe("pair_success"); + }); + }); + + describe("PF-02: Expired pairing code", () => { + it("returns pair_failed(expired) when code submitted after expiry", async () => { + const store = createMockStore(); + const { transport, sent } = createMockTransport(); + const runtime = createYonexusServerRuntime({ + config: { + followerIdentifiers: ["client-a"], + notifyBotToken: "test-token", + adminUserId: "admin", + listenHost: "127.0.0.1", + listenPort: 8787 + }, + store, + transport, + now: () => now + }); + + await runtime.start(); + + const conn = createConnection(); + await runtime.handleMessage(conn, encodeBuiltin(buildHello( + { identifier: "client-a", hasSecret: false, hasKeyPair: true, protocolVersion: YONEXUS_PROTOCOL_VERSION }, + { timestamp: now } + ))); + + const pairingCode = runtime.state.registry.clients.get("client-a")?.pairingCode; + const expiresAt = runtime.state.registry.clients.get("client-a")?.pairingExpiresAt; + expect(expiresAt).toBeDefined(); + + // Advance time past expiry + now = expiresAt! + 1; + + await runtime.handleMessage(conn, encodeBuiltin(buildPairConfirm( + { identifier: "client-a", pairingCode: pairingCode! }, + { timestamp: now } + ))); + + const lastMessage = decodeBuiltin(sent.at(-1)!.message); + expect(lastMessage.type).toBe("pair_failed"); + expect((lastMessage.payload as PairFailedPayload).reason).toBe("expired"); + + // Pairing state reset to allow new pairing + expect(runtime.state.registry.clients.get("client-a")?.pairingStatus).toBe("unpaired"); + expect(runtime.state.registry.clients.get("client-a")?.pairingCode).toBeUndefined(); + }); + }); + + describe("PF-03: Identifier not in allowlist", () => { + it("rejects hello from unknown identifier", async () => { + const store = createMockStore(); + const { transport, sent } = createMockTransport(); + const runtime = createYonexusServerRuntime({ + config: { + followerIdentifiers: ["allowed-client"], // Only this one is allowed + notifyBotToken: "test-token", + adminUserId: "admin", + listenHost: "127.0.0.1", + listenPort: 8787 + }, + store, + transport, + now: () => now + }); + + await runtime.start(); + + const conn = createConnection(); + await runtime.handleMessage(conn, encodeBuiltin(buildHello( + { identifier: "unknown-client", hasSecret: false, hasKeyPair: true, protocolVersion: YONEXUS_PROTOCOL_VERSION }, + { timestamp: now } + ))); + + // Should receive hello_ack with rejected or an error + const lastMessage = decodeBuiltin(sent.at(-1)!.message); + expect(lastMessage.type).toBe("error"); + // Identifier should not be registered + expect(runtime.state.registry.clients.has("unknown-client")).toBe(false); + }); + + it("rejects pair_confirm from unknown identifier even if somehow received", async () => { + const store = createMockStore(); + const { transport, sent } = createMockTransport(); + const runtime = createYonexusServerRuntime({ + config: { + followerIdentifiers: ["allowed-client"], + notifyBotToken: "test-token", + adminUserId: "admin", + listenHost: "127.0.0.1", + listenPort: 8787 + }, + store, + transport, + now: () => now + }); + + await runtime.start(); + + // Try to send pair_confirm for unknown client + const conn = createConnection(); + await runtime.handleMessage(conn, encodeBuiltin(buildPairConfirm( + { identifier: "unknown-client", pairingCode: "SOME-CODE" }, + { timestamp: now } + ))); + + const lastMessage = decodeBuiltin(sent.at(-1)!.message); + expect(lastMessage.type).toBe("pair_failed"); + expect((lastMessage.payload as PairFailedPayload).reason).toBe("identifier_not_allowed"); + }); + }); + + describe("PF-04: Admin notification failure", () => { + it("fails pairing when notification cannot be sent", async () => { + const store = createMockStore(); + const { transport, sent } = createMockTransport(); + const runtime = createYonexusServerRuntime({ + config: { + followerIdentifiers: ["client-a"], + notifyBotToken: "", // Empty token should cause notification failure + adminUserId: "admin", + listenHost: "127.0.0.1", + listenPort: 8787 + }, + store, + transport, + now: () => now + }); + + await runtime.start(); + + const conn = createConnection(); + await runtime.handleMessage(conn, encodeBuiltin(buildHello( + { identifier: "client-a", hasSecret: false, hasKeyPair: true, protocolVersion: YONEXUS_PROTOCOL_VERSION }, + { timestamp: now } + ))); + + // Check the pair_request indicates notification failure + const pairRequest = sent.find(m => decodeBuiltin(m.message).type === "pair_request"); + expect(pairRequest).toBeDefined(); + + // Should not have created a valid pending pairing + const record = runtime.state.registry.clients.get("client-a"); + if (record?.pairingStatus === "pending") { + // If notification failed, pairing should indicate this + expect(record.pairingNotifyStatus).toBe("failed"); + } + }); + }); + + describe("PF-05: Empty pairing code", () => { + it("rejects empty pairing code", async () => { + const store = createMockStore(); + const { transport, sent } = createMockTransport(); + const runtime = createYonexusServerRuntime({ + config: { + followerIdentifiers: ["client-a"], + notifyBotToken: "test-token", + adminUserId: "admin", + listenHost: "127.0.0.1", + listenPort: 8787 + }, + store, + transport, + now: () => now + }); + + await runtime.start(); + + const conn = createConnection(); + await runtime.handleMessage(conn, encodeBuiltin(buildHello( + { identifier: "client-a", hasSecret: false, hasKeyPair: true, protocolVersion: YONEXUS_PROTOCOL_VERSION }, + { timestamp: now } + ))); + + // Submit empty code + await runtime.handleMessage(conn, encodeBuiltin(buildPairConfirm( + { identifier: "client-a", pairingCode: "" }, + { timestamp: now + 10 } + ))); + + const lastMessage = decodeBuiltin(sent.at(-1)!.message); + expect(lastMessage.type).toBe("pair_failed"); + expect((lastMessage.payload as PairFailedPayload).reason).toBe("invalid_code"); + }); + + it("rejects whitespace-only pairing code", async () => { + const store = createMockStore(); + const { transport, sent } = createMockTransport(); + const runtime = createYonexusServerRuntime({ + config: { + followerIdentifiers: ["client-a"], + notifyBotToken: "test-token", + adminUserId: "admin", + listenHost: "127.0.0.1", + listenPort: 8787 + }, + store, + transport, + now: () => now + }); + + await runtime.start(); + + const conn = createConnection(); + await runtime.handleMessage(conn, encodeBuiltin(buildHello( + { identifier: "client-a", hasSecret: false, hasKeyPair: true, protocolVersion: YONEXUS_PROTOCOL_VERSION }, + { timestamp: now } + ))); + + // Submit whitespace code + await runtime.handleMessage(conn, encodeBuiltin(buildPairConfirm( + { identifier: "client-a", pairingCode: " \t\n " }, + { timestamp: now + 10 } + ))); + + const lastMessage = decodeBuiltin(sent.at(-1)!.message); + expect(lastMessage.type).toBe("pair_failed"); + }); + }); + + describe("PF-06: Malformed pair_confirm payload", () => { + it("handles missing identifier in pair_confirm", async () => { + const store = createMockStore(); + const { transport, sent } = createMockTransport(); + const runtime = createYonexusServerRuntime({ + config: { + followerIdentifiers: ["client-a"], + notifyBotToken: "test-token", + adminUserId: "admin", + listenHost: "127.0.0.1", + listenPort: 8787 + }, + store, + transport, + now: () => now + }); + + await runtime.start(); + + const conn = createConnection(); + await runtime.handleMessage(conn, encodeBuiltin(buildHello( + { identifier: "client-a", hasSecret: false, hasKeyPair: true, protocolVersion: YONEXUS_PROTOCOL_VERSION }, + { timestamp: now } + ))); + + // Send malformed payload (missing fields) + await runtime.handleMessage(conn, encodeBuiltin({ + type: "pair_confirm", + timestamp: now, + payload: { pairingCode: "SOME-CODE" } // Missing identifier + })); + + // Should receive an error response + const lastMessage = decodeBuiltin(sent.at(-1)!.message); + expect(lastMessage.type).toBe("error"); + }); + + it("handles missing pairingCode in pair_confirm", async () => { + const store = createMockStore(); + const { transport, sent } = createMockTransport(); + const runtime = createYonexusServerRuntime({ + config: { + followerIdentifiers: ["client-a"], + notifyBotToken: "test-token", + adminUserId: "admin", + listenHost: "127.0.0.1", + listenPort: 8787 + }, + store, + transport, + now: () => now + }); + + await runtime.start(); + + const conn = createConnection(); + await runtime.handleMessage(conn, encodeBuiltin(buildHello( + { identifier: "client-a", hasSecret: false, hasKeyPair: true, protocolVersion: YONEXUS_PROTOCOL_VERSION }, + { timestamp: now } + ))); + + // Send malformed payload (missing pairingCode) + await runtime.handleMessage(conn, encodeBuiltin({ + type: "pair_confirm", + timestamp: now, + payload: { identifier: "client-a" } // Missing pairingCode + })); + + const lastMessage = decodeBuiltin(sent.at(-1)!.message); + expect(lastMessage.type).toBe("pair_failed"); + }); + }); + + describe("PF-07: Double pairing attempt", () => { + it("rejects pair_confirm for already paired client", async () => { + const store = createMockStore([{ + identifier: "client-a", + pairingStatus: "paired", + publicKey: "existing-key", + secret: "existing-secret", + status: "offline", + recentNonces: [], + recentHandshakeAttempts: [], + createdAt: now - 1000, + updatedAt: now - 500, + pairedAt: now - 500 + }]); + + const { transport, sent } = createMockTransport(); + const runtime = createYonexusServerRuntime({ + config: { + followerIdentifiers: ["client-a"], + notifyBotToken: "test-token", + adminUserId: "admin", + listenHost: "127.0.0.1", + listenPort: 8787 + }, + store, + transport, + now: () => now + }); + + await runtime.start(); + + // Try to pair an already paired client + const conn = createConnection(); + await runtime.handleMessage(conn, encodeBuiltin(buildPairConfirm( + { identifier: "client-a", pairingCode: "SOME-CODE" }, + { timestamp: now } + ))); + + // Should reject since already paired + const lastMessage = decodeBuiltin(sent.at(-1)!.message); + expect(lastMessage.type).toBe("pair_failed"); + + // Existing trust material preserved + const record = runtime.state.registry.clients.get("client-a"); + expect(record?.pairingStatus).toBe("paired"); + expect(record?.secret).toBe("existing-secret"); + }); + }); + + describe("Edge Cases", () => { + it("PF-08: pairing attempt during an active paired session is rejected without losing trust", async () => { + const store = createMockStore([{ + identifier: "client-a", + pairingStatus: "paired", + publicKey: "existing-key", + secret: "existing-secret", + status: "online", + recentNonces: [], + recentHandshakeAttempts: [], + createdAt: now - 1000, + updatedAt: now - 10, + pairedAt: now - 500, + lastAuthenticatedAt: now - 5, + lastHeartbeatAt: now - 5 + }]); + const { transport, sent } = createMockTransport(); + const runtime = createYonexusServerRuntime({ + config: { + followerIdentifiers: ["client-a"], + notifyBotToken: "test-token", + adminUserId: "admin", + listenHost: "127.0.0.1", + listenPort: 8787 + }, + store, + transport, + now: () => now + }); + + await runtime.start(); + + const conn = createConnection("client-a"); + await runtime.handleMessage(conn, encodeBuiltin(buildPairConfirm( + { identifier: "client-a", pairingCode: "NEW-PAIR-CODE" }, + { timestamp: now } + ))); + + const lastMessage = decodeBuiltin(sent.at(-1)!.message); + expect(lastMessage.type).toBe("pair_failed"); + expect((lastMessage.payload as PairFailedPayload).reason).toBe("internal_error"); + + const record = runtime.state.registry.clients.get("client-a"); + expect(record).toMatchObject({ + pairingStatus: "paired", + secret: "existing-secret", + publicKey: "existing-key", + status: "online" + }); + }); + + it("handles concurrent pair_confirm from different connections with same identifier", async () => { + const store = createMockStore(); + const { transport, sent } = createMockTransport(); + const runtime = createYonexusServerRuntime({ + config: { + followerIdentifiers: ["client-a"], + notifyBotToken: "test-token", + adminUserId: "admin", + listenHost: "127.0.0.1", + listenPort: 8787 + }, + store, + transport, + now: () => now + }); + + await runtime.start(); + + // First connection starts pairing + const conn1 = createConnection(); + await runtime.handleMessage(conn1, encodeBuiltin(buildHello( + { identifier: "client-a", hasSecret: false, hasKeyPair: true, protocolVersion: YONEXUS_PROTOCOL_VERSION }, + { timestamp: now } + ))); + + const pairingCode = runtime.state.registry.clients.get("client-a")?.pairingCode; + + // Second connection tries to pair with same identifier + const conn2 = createConnection(); + await runtime.handleMessage(conn2, encodeBuiltin(buildPairConfirm( + { identifier: "client-a", pairingCode: pairingCode! }, + { timestamp: now + 10 } + ))); + + // Should succeed - pairing is identifier-based, not connection-based + const lastMessage = decodeBuiltin(sent.at(-1)!.message); + expect(lastMessage.type).toBe("pair_success"); + }); + + it("cleans up pending pairing state on expiry", async () => { + const store = createMockStore(); + const { transport, sent } = createMockTransport(); + const runtime = createYonexusServerRuntime({ + config: { + followerIdentifiers: ["client-a"], + notifyBotToken: "test-token", + adminUserId: "admin", + listenHost: "127.0.0.1", + listenPort: 8787 + }, + store, + transport, + now: () => now + }); + + await runtime.start(); + + const conn = createConnection(); + await runtime.handleMessage(conn, encodeBuiltin(buildHello( + { identifier: "client-a", hasSecret: false, hasKeyPair: true, protocolVersion: YONEXUS_PROTOCOL_VERSION }, + { timestamp: now } + ))); + + // Verify pending state exists + const recordBefore = runtime.state.registry.clients.get("client-a"); + expect(recordBefore?.pairingStatus).toBe("pending"); + expect(recordBefore?.pairingCode).toBeDefined(); + + // Expire and try to use old code + now += 400; // Past default TTL + await runtime.handleMessage(conn, encodeBuiltin(buildPairConfirm( + { identifier: "client-a", pairingCode: recordBefore?.pairingCode! }, + { timestamp: now } + ))); + + const lastMessage = decodeBuiltin(sent.at(-1)!.message); + expect(lastMessage.type).toBe("pair_failed"); + expect((lastMessage.payload as PairFailedPayload).reason).toBe("expired"); + + // State cleaned up + const recordAfter = runtime.state.registry.clients.get("client-a"); + expect(recordAfter?.pairingStatus).toBe("unpaired"); + expect(recordAfter?.pairingCode).toBeUndefined(); + }); + }); +}); diff --git a/tests/integration/framework.test.ts b/tests/integration/framework.test.ts new file mode 100644 index 0000000..35f2834 --- /dev/null +++ b/tests/integration/framework.test.ts @@ -0,0 +1,630 @@ +import { describe, expect, it, vi, beforeEach, afterEach } from "vitest"; +import type { ClientConnection, ServerTransport } from "../../Yonexus.Server/plugin/core/transport.js"; +import type { ClientTransport } from "../../Yonexus.Client/plugin/core/transport.js"; +import type { YonexusServerStore } from "../../Yonexus.Server/plugin/core/store.js"; +import type { YonexusClientStateStore } from "../../Yonexus.Client/plugin/core/state.js"; +import { createYonexusServerRuntime } from "../../Yonexus.Server/plugin/core/runtime.js"; +import { createYonexusClientRuntime } from "../../Yonexus.Client/plugin/core/runtime.js"; +import { + decodeBuiltin, + encodeBuiltin, + buildHello, + buildHelloAck, + buildPairRequest, + buildPairConfirm, + buildPairSuccess, + buildAuthRequest, + buildAuthSuccess, + buildHeartbeat, + buildHeartbeatAck, + createAuthRequestSigningInput, + YONEXUS_PROTOCOL_VERSION +} from "../../Yonexus.Protocol/src/index.js"; +import { generateKeyPair, signMessage } from "../../Yonexus.Client/plugin/crypto/keypair.js"; +import type { ClientRecord } from "../../Yonexus.Server/plugin/core/persistence.js"; +import type { YonexusClientState } from "../../Yonexus.Client/plugin/core/state.js"; + +/** + * Yonexus Server-Client Integration Test Framework + * + * This module provides utilities for testing Server and Client interactions + * without requiring real network sockets. + */ + +// ============================================================================ +// Mock Transport Pair - Simulates network connection between Server and Client +// ============================================================================ + +export interface MockMessageChannel { + serverToClient: string[]; + clientToServer: string[]; +} + +export interface MockTransportPair { + serverTransport: ServerTransport; + clientTransport: ClientTransport; + channel: MockMessageChannel; + getServerReceived: () => string[]; + getClientReceived: () => string[]; + clearMessages: () => void; +} + +export function createMockTransportPair(): MockTransportPair { + const channel: MockMessageChannel = { + serverToClient: [], + clientToServer: [] + }; + + // Track server-side connections + const serverConnections = new Map(); + let tempConnection: ClientConnection | null = null; + + // Server Transport Mock + const serverTransport: ServerTransport = { + isRunning: false, + connections: serverConnections, + + start: vi.fn(async () => { + serverTransport.isRunning = true; + }), + + stop: vi.fn(async () => { + serverTransport.isRunning = false; + serverConnections.clear(); + }), + + send: vi.fn((identifier: string, message: string) => { + if (serverConnections.has(identifier)) { + channel.serverToClient.push(message); + return true; + } + return false; + }), + + sendToConnection: vi.fn((connection: ClientConnection, message: string) => { + channel.serverToClient.push(message); + return true; + }), + + broadcast: vi.fn((message: string) => { + channel.serverToClient.push(`[broadcast]:${message}`); + }), + + closeConnection: vi.fn((identifier: string, code?: number, reason?: string) => { + const conn = serverConnections.get(identifier); + if (conn) { + conn.isAuthenticated = false; + serverConnections.delete(identifier); + } + return true; + }), + + assignIdentifierToTemp: vi.fn((ws, identifier: string) => { + if (tempConnection) { + tempConnection.identifier = identifier; + } + }), + + promoteToAuthenticated: vi.fn((identifier: string, ws) => { + if (tempConnection && tempConnection.identifier === identifier) { + tempConnection.isAuthenticated = true; + serverConnections.set(identifier, tempConnection); + tempConnection = null; + } + }), + + removeTempConnection: vi.fn(() => { + tempConnection = null; + }) + }; + + // Client Transport Mock + let clientState: import("../../Yonexus.Client/plugin/core/transport.js").ClientConnectionState = "idle"; + + const clientTransport: ClientTransport = { + get state() { + return clientState; + }, + + get isConnected() { + return clientState !== "idle" && clientState !== "disconnected" && clientState !== "error"; + }, + + get isAuthenticated() { + return clientState === "authenticated"; + }, + + connect: vi.fn(async () => { + clientState = "connected"; + // Simulate connection - create temp connection on server side + tempConnection = { + identifier: null, + ws: { close: vi.fn() } as unknown as WebSocket, + connectedAt: Date.now(), + isAuthenticated: false + }; + }), + + disconnect: vi.fn(() => { + clientState = "disconnected"; + tempConnection = null; + }), + + send: vi.fn((message: string) => { + if (clientState === "connected" || clientState === "authenticated" || clientState === "authenticating") { + channel.clientToServer.push(message); + return true; + } + return false; + }), + + markAuthenticated: vi.fn(() => { + clientState = "authenticated"; + }), + + markAuthenticating: vi.fn(() => { + clientState = "authenticating"; + }) + }; + + return { + serverTransport, + clientTransport, + channel, + getServerReceived: () => [...channel.clientToServer], + getClientReceived: () => [...channel.serverToClient], + clearMessages: () => { + channel.serverToClient.length = 0; + channel.clientToServer.length = 0; + } + }; +} + +// ============================================================================ +// Mock Store Factories +// ============================================================================ + +export function createMockServerStore(initialClients: ClientRecord[] = []): YonexusServerStore { + const persisted = new Map(initialClients.map((record) => [record.identifier, record])); + + return { + filePath: "/tmp/yonexus-server-test.json", + load: vi.fn(async () => ({ + version: 1, + persistedAt: Date.now(), + clients: new Map(persisted) + })), + save: vi.fn(async (clients: Iterable) => { + persisted.clear(); + for (const client of clients) { + persisted.set(client.identifier, client); + } + }) + }; +} + +export function createMockClientStore(initialState?: Partial): YonexusClientStateStore { + let state: YonexusClientState = { + identifier: initialState?.identifier ?? "test-client", + publicKey: initialState?.publicKey, + privateKey: initialState?.privateKey, + secret: initialState?.secret, + pairedAt: initialState?.pairedAt, + authenticatedAt: initialState?.authenticatedAt, + updatedAt: initialState?.updatedAt ?? Date.now() + }; + + return { + filePath: "/tmp/yonexus-client-test.json", + load: vi.fn(async () => ({ ...state })), + save: vi.fn(async (next) => { + state = { ...next }; + }) + }; +} + +// ============================================================================ +// Test Runtime Factory +// ============================================================================ + +export interface IntegrationTestContext { + serverRuntime: ReturnType; + clientRuntime: ReturnType; + transports: MockTransportPair; + serverStore: YonexusServerStore; + clientStore: YonexusClientStateStore; + advanceTime: (seconds: number) => void; + processServerToClient: () => Promise; + processClientToServer: () => Promise; + processAllMessages: () => Promise; +} + +export async function createIntegrationTestContext( + options: { + clientIdentifier?: string; + paired?: boolean; + authenticated?: boolean; + serverTime?: number; + initialClientState?: Partial; + initialServerClients?: ClientRecord[]; + } = {} +): Promise { + const initialNow = options.serverTime ?? 1_710_000_000; + const identifier = options.clientIdentifier ?? "test-client"; + + const transports = createMockTransportPair(); + const serverStore = createMockServerStore(options.initialServerClients ?? []); + const clientStore = createMockClientStore({ identifier, ...options.initialClientState }); + + // Generate keypair for client if needed + const keyPair = await generateKeyPair(); + + let currentTime = initialNow; + + const serverRuntime = createYonexusServerRuntime({ + config: { + followerIdentifiers: [identifier], + notifyBotToken: "test-token", + adminUserId: "admin-user", + listenHost: "127.0.0.1", + listenPort: 8787 + }, + store: serverStore, + transport: transports.serverTransport, + now: () => currentTime + }); + + const clientRuntime = createYonexusClientRuntime({ + config: { + mainHost: "ws://localhost:8787", + identifier, + notifyBotToken: "test-token", + adminUserId: "admin-user" + }, + transport: transports.clientTransport, + stateStore: clientStore, + now: () => currentTime + }); + + await serverRuntime.start(); + const advanceTime = (seconds: number) => { + currentTime += seconds; + }; + + // Message processing helpers + const processServerToClient = async () => { + const messages = transports.getClientReceived(); + transports.clearMessages(); + for (const msg of messages) { + await clientRuntime.handleMessage(msg); + } + }; + + const processClientToServer = async () => { + const messages = transports.getServerReceived(); + transports.clearMessages(); + + // Get the temp connection for message handling + const connection = { + identifier: identifier, + ws: { close: vi.fn() } as unknown as WebSocket, + connectedAt: currentTime, + isAuthenticated: options.authenticated ?? false + }; + + for (const msg of messages) { + await serverRuntime.handleMessage(connection, msg); + } + }; + + const processAllMessages = async () => { + await processClientToServer(); + await processServerToClient(); + }; + + return { + serverRuntime, + clientRuntime, + transports, + serverStore, + clientStore, + advanceTime, + processServerToClient, + processClientToServer, + processAllMessages + }; +} + +// ============================================================================ +// Integration Test Suite +// ============================================================================ + +describe("Yonexus Server-Client Integration", () => { + beforeEach(() => { + vi.useFakeTimers(); + }); + + afterEach(() => { + vi.useRealTimers(); + }); + + describe("First-Time Pairing Flow", () => { + it("completes full pairing and authentication cycle", async () => { + const ctx = await createIntegrationTestContext({ + clientIdentifier: "new-client" + }); + + // Step 1: Client connects and sends hello + await ctx.clientRuntime.start(); + ctx.clientRuntime.handleTransportStateChange("connected"); + await vi.advanceTimersByTimeAsync(100); + + // Process hello -> hello_ack + pair_request + await ctx.processClientToServer(); + await ctx.processServerToClient(); + + // Verify client received pair_request + expect(ctx.clientRuntime.state.phase).toBe("waiting_pair_confirm"); + expect(ctx.clientRuntime.state.pendingPairing).toBeDefined(); + + // Step 2: Client submits pairing code + const pairingCode = ctx.serverRuntime.state.registry.clients.get("new-client")?.pairingCode; + expect(pairingCode).toBeDefined(); + + ctx.clientRuntime.submitPairingCode(pairingCode!, "req-pair-confirm"); + await vi.advanceTimersByTimeAsync(100); + + // Process pair_confirm -> pair_success + await ctx.processClientToServer(); + await ctx.processServerToClient(); + + // Verify client received secret + expect(ctx.clientRuntime.state.clientState.secret).toBeDefined(); + expect(ctx.clientRuntime.state.phase).toBe("auth_required"); + + // Step 3: Client sends auth request + await vi.advanceTimersByTimeAsync(100); + await ctx.processClientToServer(); + await ctx.processServerToClient(); + + // Verify authentication success + expect(ctx.clientRuntime.state.phase).toBe("authenticated"); + expect(ctx.serverRuntime.state.registry.sessions.get("new-client")?.isAuthenticated).toBe(true); + }); + }); + + describe("Reconnection Flow", () => { + it("reconnects with existing credentials without re-pairing", async () => { + const now = 1_710_000_000; + const keyPair = await generateKeyPair(); + + const ctx = await createIntegrationTestContext({ + clientIdentifier: "reconnect-client", + paired: true, + authenticated: false, + initialClientState: { + secret: "existing-secret", + publicKey: keyPair.publicKey.trim(), + privateKey: keyPair.privateKey, + pairedAt: now - 1000, + updatedAt: now - 1000 + }, + initialServerClients: [ + { + identifier: "reconnect-client", + pairingStatus: "paired", + publicKey: keyPair.publicKey.trim(), + secret: "existing-secret", + status: "offline", + recentNonces: [], + recentHandshakeAttempts: [], + createdAt: now - 2000, + updatedAt: now - 1000 + } + ] + }); + + // Connect and send hello + await ctx.clientRuntime.start(); + ctx.clientRuntime.handleTransportStateChange("connected"); + await vi.advanceTimersByTimeAsync(100); + + await ctx.processClientToServer(); + await ctx.processServerToClient(); + + // Should go directly to auth_required, skipping pairing + expect(ctx.clientRuntime.state.phase).toBe("auth_required"); + + // Complete authentication + await vi.advanceTimersByTimeAsync(100); + await ctx.processClientToServer(); + await ctx.processServerToClient(); + + expect(ctx.clientRuntime.state.phase).toBe("authenticated"); + }); + }); + + describe("Heartbeat Flow", () => { + it("exchanges heartbeats after authentication", async () => { + const now = 1_710_000_000; + const keyPair = await generateKeyPair(); + const ctx = await createIntegrationTestContext({ + clientIdentifier: "heartbeat-client", + serverTime: now, + initialClientState: { + secret: "existing-secret", + publicKey: keyPair.publicKey.trim(), + privateKey: keyPair.privateKey, + pairedAt: now - 1000, + updatedAt: now - 1000 + }, + initialServerClients: [ + { + identifier: "heartbeat-client", + pairingStatus: "paired", + publicKey: keyPair.publicKey.trim(), + secret: "existing-secret", + status: "offline", + recentNonces: [], + recentHandshakeAttempts: [], + createdAt: now - 2000, + updatedAt: now - 1000 + } + ] + }); + + await ctx.clientRuntime.start(); + ctx.clientRuntime.handleTransportStateChange("connected"); + await vi.advanceTimersByTimeAsync(100); + await ctx.processClientToServer(); + await ctx.processServerToClient(); + await vi.advanceTimersByTimeAsync(100); + await ctx.processClientToServer(); + await ctx.processServerToClient(); + + expect(ctx.clientRuntime.state.phase).toBe("authenticated"); + + await ctx.clientRuntime.handleMessage("heartbeat_tick"); + await vi.advanceTimersByTimeAsync(100); + await ctx.processClientToServer(); + + const record = ctx.serverRuntime.state.registry.clients.get("heartbeat-client"); + expect(record?.lastHeartbeatAt).toBeDefined(); + }); + + it("marks client unstable then offline after heartbeat timeout", async () => { + const now = 1_710_000_000; + const keyPair = await generateKeyPair(); + const ctx = await createIntegrationTestContext({ + clientIdentifier: "timed-out-client", + serverTime: now + }); + + ctx.serverRuntime.state.registry.clients.set("timed-out-client", { + identifier: "timed-out-client", + pairingStatus: "paired", + publicKey: keyPair.publicKey.trim(), + secret: "existing-secret", + status: "online", + recentNonces: [], + recentHandshakeAttempts: [], + lastAuthenticatedAt: now, + lastHeartbeatAt: now, + createdAt: now - 100, + updatedAt: now + }); + ctx.serverRuntime.state.registry.sessions.set("timed-out-client", { + identifier: "timed-out-client", + socket: { close: vi.fn() } as unknown as WebSocket, + isAuthenticated: true, + connectedAt: now, + lastActivityAt: now, + publicKey: keyPair.publicKey.trim() + }); + ctx.transports.serverTransport.connections.set("timed-out-client", { + identifier: "timed-out-client", + ws: { close: vi.fn() } as unknown as WebSocket, + connectedAt: now, + isAuthenticated: true + }); + + ctx.advanceTime(7 * 60); + await vi.advanceTimersByTimeAsync(30_100); + + const unstableRecord = ctx.serverRuntime.state.registry.clients.get("timed-out-client"); + expect(unstableRecord?.status).toBe("unstable"); + expect(ctx.transports.channel.serverToClient.some((message) => { + const envelope = decodeBuiltin(message); + return envelope.type === "status_update"; + })).toBe(true); + + ctx.advanceTime(4 * 60); + await vi.advanceTimersByTimeAsync(30_100); + + const offlineRecord = ctx.serverRuntime.state.registry.clients.get("timed-out-client"); + expect(offlineRecord?.status).toBe("offline"); + expect(ctx.serverRuntime.state.registry.sessions.has("timed-out-client")).toBe(false); + expect(ctx.transports.channel.serverToClient.some((message) => { + const envelope = decodeBuiltin(message); + return envelope.type === "disconnect_notice"; + })).toBe(true); + }); + }); + + describe("Re-pair Flow", () => { + it("forces client back to pair_required after nonce collision", async () => { + const now = 1_710_000_000; + const keyPair = await generateKeyPair(); + const collisionNonce = "NONCE1234567890123456789"; + const ctx = await createIntegrationTestContext({ + clientIdentifier: "collision-client", + serverTime: now, + initialClientState: { + secret: "existing-secret", + publicKey: keyPair.publicKey.trim(), + privateKey: keyPair.privateKey, + pairedAt: now - 100, + updatedAt: now - 100 + }, + initialServerClients: [ + { + identifier: "collision-client", + pairingStatus: "paired", + publicKey: keyPair.publicKey.trim(), + secret: "existing-secret", + status: "offline", + recentNonces: [{ nonce: collisionNonce, timestamp: now - 1 }], + recentHandshakeAttempts: [], + createdAt: now - 200, + updatedAt: now - 100 + } + ] + }); + + ctx.serverRuntime.state.registry.sessions.set("collision-client", { + identifier: "collision-client", + socket: { close: vi.fn() } as unknown as WebSocket, + isAuthenticated: false, + connectedAt: now, + lastActivityAt: now, + publicKey: keyPair.publicKey.trim() + }); + + const authRequest = buildAuthRequest( + { + identifier: "collision-client", + nonce: collisionNonce, + proofTimestamp: now, + signature: await signMessage( + keyPair.privateKey, + createAuthRequestSigningInput({ + secret: "existing-secret", + nonce: collisionNonce, + proofTimestamp: now + }) + ), + publicKey: keyPair.publicKey.trim() + }, + { requestId: "req-collision", timestamp: now } + ); + + await ctx.serverRuntime.handleMessage( + { + identifier: "collision-client", + ws: { close: vi.fn() } as unknown as WebSocket, + connectedAt: now, + isAuthenticated: false + }, + encodeBuiltin(authRequest) + ); + + const serverEnvelope = decodeBuiltin(ctx.transports.channel.serverToClient.at(-1) ?? ""); + expect(serverEnvelope.type).toBe("re_pair_required"); + + await ctx.clientRuntime.handleMessage(ctx.transports.channel.serverToClient.at(-1)!); + expect(ctx.clientRuntime.state.phase).toBe("pair_required"); + expect(ctx.clientRuntime.state.clientState.secret).toBeUndefined(); + expect(ctx.clientRuntime.state.lastPairingFailure).toBe("re_pair_required"); + }); + }); +});