diff --git a/NEW_FEAT.md b/NEW_FEAT.md new file mode 100644 index 0000000..04133d7 --- /dev/null +++ b/NEW_FEAT.md @@ -0,0 +1,185 @@ +# NEW_FEAT — Dirigent 设计草案(channel 成员缓存驱动 turn 初始化) + +## 背景 + +当前 turn manager 初始化依赖 `recordChannelAccount`(被动观察频道里出现过哪些 bot account)。 +在新频道/低活跃频道中,可能出现 turn state 未及时建立,导致 `currentSpeaker` 为空(`undefined/null`)并引发调度异常。 + +同时,`discord-control-api` 以独立 sidecar 运行增加了部署与运维复杂度。 + +--- + +## 目标 + +1. 将 Discord control 能力从独立 API server 收敛为插件内模块(类似 turn-manager.ts 的内部模块化)。 +2. 引入 **channel 成员列表缓存(内存 + 本地文件持久化)**,作为 turn 初始化的权威输入。 +3. 成员列表采用 **事件/工具触发更新**,不轮询。 + +--- + +## 方案总览 + +### A. 架构调整 + +- 新增模块:`plugin/discord-control.ts` +- 迁移当前 `discord-control-api/server.mjs` 的核心能力为模块函数: + - `channelPrivateCreate` + - `channelPrivateUpdate` + - `guildMemberList` + - `channelMemberList`(新增) + +> 说明:`channelMemberList` 通过 Discord 权限计算(guild 成员 + channel overwrite)得到可见成员。 + +--- + +### B. channel 成员缓存(内存 + 本地持久化) + +建议新增缓存文件(示例): +- `~/.openclaw/dirigent-channel-members.json` + +结构示例: + +```json +{ + "1479928646830391346": { + "guildId": "1368531017534537779", + "memberUserIds": ["..."], + "botAccountIds": ["neon", "nav", "lyn"], + "updatedAt": "2026-03-07T21:40:00Z", + "source": "tool|bootstrap|channel-private-update" + } +} +``` + +插件启动时加载到内存;更新时先改内存再原子写盘。 + +--- + +### C. 更新策略(不轮询) + +只在以下时机刷新某 channel 成员: + +1. 通过指定工具显式更新(手动触发) +2. `channel-private-create` 成功后刷新该 channel +3. `channel-private-update` 成功后刷新该 channel +4. (建议兜底)首次遇到无缓存 channel 时允许一次 bootstrap 拉取并落盘 + +> 除上述触发外,不做轮询保活。 + +--- + +### D. turn manager 初始化改造 + +当前: +- `ensureTurnOrder()` 依赖 `recordChannelAccount` 的被动观察结果 + +改造后: +- `ensureTurnOrder(channelId)` 直接读取 channel 成员缓存中的 `botAccountIds` +- 有可用 bot 列表即 `initTurnOrder(channelId, botAccountIds)` +- 不再把 `recordChannelAccount` 作为主初始化来源(可保留为辅助观测) + +预期效果: +- 新频道首次消息即可建立 turn state +- 避免 `currentSpeaker` 长时间空值导致的并发生成异常 + +--- + +### E. 统一 channelId 解析(`deriveDecisionInputFromPrompt` 对齐 `extractDiscordChannelId`) + +问题点: +- `deriveDecisionInputFromPrompt(prompt, messageProvider, ctx.channelId)` 目前可能优先使用 `ctx.channelId`。 +- 在 OpenClaw 的 Discord 场景中,`ctx.channelId` 常是平台名(`"discord"`),不是 snowflake。 + +改造建议: +- 在 `before_model_resolve / before_prompt_build` 路径中,先走与消息钩子一致的解析逻辑: + 1. `extractDiscordChannelId(ctx, event)`(优先) + 2. `sessionKey` 正则兜底(`discord:(channel|group):` 或 `:channel:`) + 3. prompt 中 untrusted metadata(`chat_id` / `conversation_label` / `channel_id`)作为最后兜底 +- `ctx.channelId` 仅作为末级 fallback,不再作为高优先级输入。 + +目标: +- 让 `message_received / message_sent / before_model_resolve / before_prompt_build` 使用一致的 channelId 来源,避免出现 `channel=discord` 导致 turn/policy 失配。 + +--- + +### F. 清理未使用函数(减小维护噪音) + +当前检查到 `plugin/index.ts` 中存在未被引用的函数: +- `normalizeSender(...)` +- `normalizeChannel(...)` + +计划: +- 在完成 channelId 解析统一改造后,移除上述未使用函数与相关无效注释。 +- 如后续确实需要其语义,改为在统一解析模块内落地实际调用,不保留“仅定义不使用”的中间状态。 + +目标: +- 降低代码噪音,避免误导排障(看起来像在用,实际没走到)。 +- 保持关键路径(sender/channel 解析)只有一套可追踪实现。 + +--- + +## Discord 权限计算要点(channelMemberList) + +按 Discord 规则计算 `VIEW_CHANNEL` 可见性: + +1. 基于 guild 基础权限(@everyone + 角色) +2. 叠加 channel `permission_overwrites`: + - @everyone overwrite + - role overwrites(合并) + - member overwrite(最终) +3. 最终判定是否可见频道 + +> 私密线程与普通私密文本频道语义不同,需分路径处理。 + +--- + +## 风险与注意 + +- 权限位计算需严格按 Discord bitfield 规则实现,否则成员集会偏差。 +- 缓存文件读写要原子化,防止并发写损坏。 +- 建议记录 `updatedAt/source` 便于排查“成员集为何变化”。 + +--- + +### G. 模块化重构(拆分 `plugin/index.ts`) + +现状: +- `plugin/index.ts` 体积过大,hook、工具、状态、解析逻辑耦合在一起,排障成本高。 + +重构方向: +- `plugin/hooks/` + - `message-received.ts` + - `before-model-resolve.ts` + - `before-prompt-build.ts` + - `before-message-write.ts` + - `message-sent.ts` +- `plugin/core/` + - `decision-input.ts` + - `channel-resolver.ts` + - `session-state.ts` +- `plugin/policy/` + - `policy-store.ts` + - `policy-resolver.ts` +- `plugin/tools/` + - `discord-control-tools.ts` + - `policy-tools.ts` +- `plugin/index.ts` + - 仅保留 wiring(注册工具 + 注册 hooks + 生命周期管理) + +目标: +- 单文件职责清晰,便于定位问题与编写回归测试。 +- 关键路径(channel/sender/session/turn)统一入口,避免同逻辑多处实现漂移。 + +--- + +## 里程碑建议 + +1. 抽离 `discord-control.ts`(功能等价迁移) +2. 增加成员缓存存储与读写 +3. 实现 `channelMemberList` 权限计算 +4. 打通 create/update 工具触发刷新 +5. 改造 turn 初始化读取缓存 +6. 统一 channelId 解析到 `extractDiscordChannelId` 路径 +7. 清理未使用函数(`normalizeSender` / `normalizeChannel`) +8. 模块化拆分 `plugin/index.ts`(最少先拆 core 解析层) +9. 增加调试日志与回归测试(新频道首条消息场景) diff --git a/plugin/channel-resolver.ts b/plugin/channel-resolver.ts new file mode 100644 index 0000000..c795b1b --- /dev/null +++ b/plugin/channel-resolver.ts @@ -0,0 +1,73 @@ +export function extractDiscordChannelId(ctx: Record, event?: Record): string | undefined { + const candidates: unknown[] = [ + ctx.conversationId, + ctx.OriginatingTo, + event?.to, + (event?.metadata as Record)?.to, + ]; + + for (const c of candidates) { + if (typeof c !== "string" || !c.trim()) continue; + const s = c.trim(); + + if (s.startsWith("channel:")) { + const id = s.slice("channel:".length); + if (/^\d+$/.test(id)) return id; + } + + if (s.startsWith("discord:channel:")) { + const id = s.slice("discord:channel:".length); + if (/^\d+$/.test(id)) return id; + } + + if (/^\d{15,}$/.test(s)) return s; + } + + return undefined; +} + +export function extractDiscordChannelIdFromSessionKey(sessionKey?: string): string | undefined { + if (!sessionKey) return undefined; + + const canonical = sessionKey.match(/discord:(?:channel|group):(\d+)/); + if (canonical?.[1]) return canonical[1]; + + const suffix = sessionKey.match(/:channel:(\d+)$/); + if (suffix?.[1]) return suffix[1]; + + return undefined; +} + +export function extractUntrustedConversationInfo(text: string): Record | undefined { + const marker = "Conversation info (untrusted metadata):"; + const idx = text.indexOf(marker); + if (idx < 0) return undefined; + const tail = text.slice(idx + marker.length); + const m = tail.match(/```json\s*([\s\S]*?)\s*```/i); + if (!m) return undefined; + + try { + const parsed = JSON.parse(m[1]); + return parsed && typeof parsed === "object" ? (parsed as Record) : undefined; + } catch { + return undefined; + } +} + +export function extractDiscordChannelIdFromConversationMetadata(conv: Record): string | undefined { + if (typeof conv.chat_id === "string" && conv.chat_id.startsWith("channel:")) { + const id = conv.chat_id.slice("channel:".length); + if (/^\d+$/.test(id)) return id; + } + + if (typeof conv.conversation_label === "string") { + const labelMatch = conv.conversation_label.match(/channel id:(\d+)/); + if (labelMatch?.[1]) return labelMatch[1]; + } + + if (typeof conv.channel_id === "string" && /^\d+$/.test(conv.channel_id)) { + return conv.channel_id; + } + + return undefined; +} diff --git a/plugin/decision-input.ts b/plugin/decision-input.ts new file mode 100644 index 0000000..dc176e5 --- /dev/null +++ b/plugin/decision-input.ts @@ -0,0 +1,37 @@ +import { + extractDiscordChannelId, + extractDiscordChannelIdFromConversationMetadata, + extractDiscordChannelIdFromSessionKey, + extractUntrustedConversationInfo, +} from "./channel-resolver.js"; + +export type DerivedDecisionInput = { + channel: string; + channelId?: string; + senderId?: string; + content: string; + conv: Record; +}; + +export function deriveDecisionInputFromPrompt(params: { + prompt: string; + messageProvider?: string; + sessionKey?: string; + ctx?: Record; + event?: Record; +}): DerivedDecisionInput { + const { prompt, messageProvider, sessionKey, ctx, event } = params; + const conv = extractUntrustedConversationInfo(prompt) || {}; + const channel = (messageProvider || "").toLowerCase(); + + let channelId = extractDiscordChannelId(ctx || {}, event); + if (!channelId) channelId = extractDiscordChannelIdFromSessionKey(sessionKey); + if (!channelId) channelId = extractDiscordChannelIdFromConversationMetadata(conv); + + const senderId = + (typeof conv.sender_id === "string" && conv.sender_id) || + (typeof conv.sender === "string" && conv.sender) || + undefined; + + return { channel, channelId, senderId, content: prompt, conv }; +} diff --git a/plugin/index.ts b/plugin/index.ts index f6a4779..d0c73f4 100644 --- a/plugin/index.ts +++ b/plugin/index.ts @@ -5,6 +5,8 @@ import type { OpenClawPluginApi } from "openclaw/plugin-sdk"; import { evaluateDecision, resolvePolicy, type ChannelPolicy, type Decision, type DirigentConfig } from "./rules.js"; import { checkTurn, advanceTurn, resetTurn, onNewMessage, onSpeakerDone, initTurnOrder, getTurnDebugInfo, setMentionOverride, hasMentionOverride, setWaitingForHuman, isWaitingForHuman } from "./turn-manager.js"; import { startModeratorPresence, stopModeratorPresence } from "./moderator-presence.js"; +import { extractDiscordChannelId, extractDiscordChannelIdFromSessionKey } from "./channel-resolver.js"; +import { deriveDecisionInputFromPrompt } from "./decision-input.js"; // ── No-Reply API child process lifecycle ────────────────────────────── let noReplyProcess: ChildProcess | null = null; @@ -97,116 +99,6 @@ const policyState: PolicyState = { channelPolicies: {}, }; -function normalizeChannel(ctx: Record): string { - const candidates = [ctx.commandSource, ctx.messageProvider, ctx.channelId, ctx.channel]; - for (const c of candidates) { - if (typeof c === "string" && c.trim()) return c.trim().toLowerCase(); - } - return ""; -} - -/** - * Extract the actual Discord channel ID from a conversationId or "to" field. - * OpenClaw uses format "channel:" for Discord conversations. - * Also tries event.to and event.metadata.to as fallbacks. - */ -function extractDiscordChannelId(ctx: Record, event?: Record): string | undefined { - const candidates: unknown[] = [ - ctx.conversationId, - event?.to, - (event?.metadata as Record)?.to, - ]; - for (const c of candidates) { - if (typeof c === "string" && c.trim()) { - const s = c.trim(); - // Handle "channel:123456" format - if (s.startsWith("channel:")) { - const id = s.slice("channel:".length); - if (/^\d+$/.test(id)) return id; - } - // Handle "discord:channel:123456" format - if (s.startsWith("discord:channel:")) { - const id = s.slice("discord:channel:".length); - if (/^\d+$/.test(id)) return id; - } - // If it's a raw snowflake (all digits), use directly - if (/^\d{15,}$/.test(s)) return s; - } - } - return undefined; -} - -function normalizeSender(event: Record, ctx: Record): string | undefined { - const direct = [ctx.senderId, ctx.from, event.from]; - for (const v of direct) { - if (typeof v === "string" && v.trim()) return v.trim(); - } - - const meta = (event.metadata || ctx.metadata) as Record | undefined; - if (!meta) return undefined; - const metaCandidates = [meta.senderId, meta.sender_id, meta.userId, meta.user_id]; - for (const v of metaCandidates) { - if (typeof v === "string" && v.trim()) return v.trim(); - } - - return undefined; -} - -function extractUntrustedConversationInfo(text: string): Record | undefined { - const marker = "Conversation info (untrusted metadata):"; - const idx = text.indexOf(marker); - if (idx < 0) return undefined; - const tail = text.slice(idx + marker.length); - const m = tail.match(/```json\s*([\s\S]*?)\s*```/i); - if (!m) return undefined; - try { - const parsed = JSON.parse(m[1]); - return parsed && typeof parsed === "object" ? (parsed as Record) : undefined; - } catch { - return undefined; - } -} - -function deriveDecisionInputFromPrompt( - prompt: string, - messageProvider?: string, - channelIdFromCtx?: string, -): { - channel: string; - channelId?: string; - senderId?: string; - content: string; - conv: Record; -} { - const conv = extractUntrustedConversationInfo(prompt) || {}; - const channel = (messageProvider || "").toLowerCase(); - - // Priority: ctx.channelId > conv.chat_id > conversation_label > conv.channel_id - let channelId = channelIdFromCtx; - if (!channelId) { - // Try chat_id field (format "channel:123456") - if (typeof conv.chat_id === "string" && conv.chat_id.startsWith("channel:")) { - channelId = conv.chat_id.slice("channel:".length); - } - // Try conversation_label (format "Guild #name channel id:123456") - if (!channelId && typeof conv.conversation_label === "string") { - const labelMatch = conv.conversation_label.match(/channel id:(\d+)/); - if (labelMatch) channelId = labelMatch[1]; - } - // Try channel_id field directly - if (!channelId && typeof conv.channel_id === "string" && conv.channel_id) { - channelId = conv.channel_id; - } - } - - const senderId = - (typeof conv.sender_id === "string" && conv.sender_id) || - (typeof conv.sender === "string" && conv.sender) || - undefined; - - return { channel, channelId, senderId, content: prompt, conv }; -} - function pruneDecisionMap(now = Date.now()) { for (const [k, v] of sessionDecision.entries()) { if (now - v.createdAt > DECISION_TTL_MS) sessionDecision.delete(k); @@ -916,12 +808,13 @@ export default { ); } - const derived = deriveDecisionInputFromPrompt(prompt, ctx.messageProvider, ctx.channelId); - // Fallback: extract channelId from sessionKey (format "agent::discord:channel:") - if (!derived.channelId && key) { - const skMatch = key.match(/:channel:(\d+)$/); - if (skMatch) derived.channelId = skMatch[1]; - } + const derived = deriveDecisionInputFromPrompt({ + prompt, + messageProvider: ctx.messageProvider, + sessionKey: key, + ctx: ctx as Record, + event: event as Record, + }); // Only proceed if: discord channel AND prompt contains untrusted metadata const hasConvMarker = prompt.includes("Conversation info (untrusted metadata):"); if (live.discordOnly !== false && (!hasConvMarker || derived.channel !== "discord")) return; @@ -1034,7 +927,13 @@ export default { if (rec) sessionDecision.delete(key); const prompt = ((event as Record).prompt as string) || ""; - const derived = deriveDecisionInputFromPrompt(prompt, ctx.messageProvider, ctx.channelId); + const derived = deriveDecisionInputFromPrompt({ + prompt, + messageProvider: ctx.messageProvider, + sessionKey: key, + ctx: ctx as Record, + event: event as Record, + }); const decision = evaluateDecision({ config: live, @@ -1080,7 +979,13 @@ export default { // Resolve end symbols from config/policy for dynamic instruction const prompt = ((event as Record).prompt as string) || ""; - const derived = deriveDecisionInputFromPrompt(prompt, ctx.messageProvider, ctx.channelId); + const derived = deriveDecisionInputFromPrompt({ + prompt, + messageProvider: ctx.messageProvider, + sessionKey: key, + ctx: ctx as Record, + event: event as Record, + }); const policy = resolvePolicy(live, derived.channelId, policyState.channelPolicies); const isGroupChat = derived.conv.is_group_chat === true || derived.conv.is_group_chat === "true"; const schedulingId = live.schedulingIdentifier || "➡️"; @@ -1319,8 +1224,7 @@ export default { channelId = sessionChannelId.get(key); } if (!channelId && key) { - const skMatch = key.match(/:channel:(\d+)$/); - if (skMatch) channelId = skMatch[1]; + channelId = extractDiscordChannelIdFromSessionKey(key); } const accountId = (ctx.accountId as string | undefined) || (key ? sessionAccountId.get(key) : undefined); const content = (event.content as string) || "";