fix: implement turn gate and handoff improvements #8

Merged
hzhang merged 6 commits from fix/turn-gate-and-handoff into feat/turn-based-speaking 2026-03-02 10:22:12 +00:00
3 changed files with 1394 additions and 54 deletions

1105
dist/whispergate/index.ts vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -22,8 +22,7 @@
"debugLogChannelIds": [],
"discordControlApiBaseUrl": "http://127.0.0.1:8790",
"discordControlApiToken": "<DISCORD_CONTROL_AUTH_TOKEN>",
"discordControlCallerId": "agent-main",
"moderatorBotToken": "<MODERATOR_BOT_TOKEN>"
"discordControlCallerId": "agent-main"
}
}
}

View File

@@ -24,7 +24,10 @@ type DebugConfig = {
};
const sessionDecision = new Map<string, DecisionRecord>();
const sessionAllowed = new Map<string, boolean>(); // Track if session was allowed to speak (true) or forced no-reply (false)
const sessionInjected = new Set<string>(); // Track which sessions have already injected the end marker
const sessionChannelId = new Map<string, string>(); // Track sessionKey -> channelId mapping
const sessionAccountId = new Map<string, string>(); // Track sessionKey -> accountId mapping
const MAX_SESSION_DECISIONS = 2000;
const DECISION_TTL_MS = 5 * 60 * 1000;
function buildEndMarkerInstruction(endSymbols: string[], isGroupChat: boolean): string {
@@ -49,6 +52,37 @@ function normalizeChannel(ctx: Record<string, unknown>): string {
return "";
}
/**
* Extract the actual Discord channel ID from a conversationId or "to" field.
* OpenClaw uses format "channel:<snowflake>" for Discord conversations.
* Also tries event.to and event.metadata.to as fallbacks.
*/
function extractDiscordChannelId(ctx: Record<string, unknown>, event?: Record<string, unknown>): string | undefined {
const candidates: unknown[] = [
ctx.conversationId,
event?.to,
(event?.metadata as Record<string, unknown>)?.to,
];
for (const c of candidates) {
if (typeof c === "string" && c.trim()) {
const s = c.trim();
// Handle "channel:123456" format
if (s.startsWith("channel:")) {
const id = s.slice("channel:".length);
if (/^\d+$/.test(id)) return id;
}
// Handle "discord:channel:123456" format
if (s.startsWith("discord:channel:")) {
const id = s.slice("discord:channel:".length);
if (/^\d+$/.test(id)) return id;
}
// If it's a raw snowflake (all digits), use directly
if (/^\d{15,}$/.test(s)) return s;
}
}
return undefined;
}
function normalizeSender(event: Record<string, unknown>, ctx: Record<string, unknown>): string | undefined {
const direct = [ctx.senderId, ctx.from, event.from];
for (const v of direct) {
@@ -83,6 +117,7 @@ function extractUntrustedConversationInfo(text: string): Record<string, unknown>
function deriveDecisionInputFromPrompt(
prompt: string,
messageProvider?: string,
channelIdFromCtx?: string,
): {
channel: string;
channelId?: string;
@@ -92,11 +127,25 @@ function deriveDecisionInputFromPrompt(
} {
const conv = extractUntrustedConversationInfo(prompt) || {};
const channel = (messageProvider || "").toLowerCase();
const channelId =
(typeof conv.channel_id === "string" && conv.channel_id) ||
(typeof conv.chat_id === "string" && conv.chat_id.startsWith("channel:")
? conv.chat_id.slice("channel:".length)
: undefined);
// Priority: ctx.channelId > conv.chat_id > conversation_label > conv.channel_id
let channelId = channelIdFromCtx;
if (!channelId) {
// Try chat_id field (format "channel:123456")
if (typeof conv.chat_id === "string" && conv.chat_id.startsWith("channel:")) {
channelId = conv.chat_id.slice("channel:".length);
}
// Try conversation_label (format "Guild #name channel id:123456")
if (!channelId && typeof conv.conversation_label === "string") {
const labelMatch = conv.conversation_label.match(/channel id:(\d+)/);
if (labelMatch) channelId = labelMatch[1];
}
// Try channel_id field directly
if (!channelId && typeof conv.channel_id === "string" && conv.channel_id) {
channelId = conv.channel_id;
}
}
const senderId =
(typeof conv.sender_id === "string" && conv.sender_id) ||
(typeof conv.sender === "string" && conv.sender) ||
@@ -183,7 +232,7 @@ function resolveAccountId(api: OpenClawPluginApi, agentId: string): string | und
}
/**
* Get all Discord bot accountIds from config bindings (excluding humanList-bound agents).
* Get all Discord bot accountIds from config bindings.
*/
function getAllBotAccountIds(api: OpenClawPluginApi): string[] {
const root = (api.config as Record<string, unknown>) || {};
@@ -199,12 +248,44 @@ function getAllBotAccountIds(api: OpenClawPluginApi): string[] {
return ids;
}
/**
* Track which bot accountIds have been seen in each channel via message_received.
* Key: channelId, Value: Set of accountIds seen.
*/
const channelSeenAccounts = new Map<string, Set<string>>();
/**
* Record a bot accountId seen in a channel.
* Returns true if this is a new account for this channel (turn order should be updated).
*/
function recordChannelAccount(channelId: string, accountId: string): boolean {
let seen = channelSeenAccounts.get(channelId);
if (!seen) {
seen = new Set();
channelSeenAccounts.set(channelId, seen);
}
if (seen.has(accountId)) return false;
seen.add(accountId);
return true;
}
/**
* Get the list of bot accountIds seen in a channel.
* Only returns accounts that are also in the global bindings (actual bots).
*/
function getChannelBotAccountIds(api: OpenClawPluginApi, channelId: string): string[] {
const allBots = new Set(getAllBotAccountIds(api));
const seen = channelSeenAccounts.get(channelId);
if (!seen) return [];
return [...seen].filter(id => allBots.has(id));
}
/**
* Ensure turn order is initialized for a channel.
* Uses all bot accounts from bindings as the turn order.
* Uses only bot accounts that have been seen in this channel.
*/
function ensureTurnOrder(api: OpenClawPluginApi, channelId: string): void {
const botAccounts = getAllBotAccountIds(api);
const botAccounts = getChannelBotAccountIds(api, channelId);
if (botAccounts.length > 0) {
initTurnOrder(channelId, botAccounts);
}
@@ -262,20 +343,8 @@ function resolveDiscordUserId(api: OpenClawPluginApi, accountId: string): string
const discord = (channels.discord as Record<string, unknown>) || {};
const accounts = (discord.accounts as Record<string, Record<string, unknown>>) || {};
const acct = accounts[accountId];
if (!acct?.token || typeof acct.token !== "string") {
api.logger.warn(`whispergate: resolveDiscordUserId failed for accountId=${accountId}: no token found in config`);
return undefined;
}
const userId = userIdFromToken(acct.token);
if (!userId) {
api.logger.warn(`whispergate: resolveDiscordUserId failed for accountId=${accountId}: could not parse userId from token`);
return undefined;
}
api.logger.info(`whispergate: resolveDiscordUserId success accountId=${accountId} userId=${userId}`);
return userId;
if (!acct?.token || typeof acct.token !== "string") return undefined;
return userIdFromToken(acct.token);
}
/** Get the moderator bot's Discord user ID from its token */
@@ -540,7 +609,9 @@ export default {
try {
const c = (ctx || {}) as Record<string, unknown>;
const e = (event || {}) as Record<string, unknown>;
const preChannelId = typeof c.channelId === "string" ? c.channelId : undefined;
// ctx.channelId is the platform name (e.g. "discord"), NOT the Discord channel snowflake.
// Extract the real Discord channel ID from conversationId or event.to.
const preChannelId = extractDiscordChannelId(c, e);
const livePre = getLivePluginConfig(api, baseConfig as WhisperGateConfig) as WhisperGateConfig & DebugConfig;
if (shouldDebugLog(livePre, preChannelId)) {
api.logger.info(`whispergate: debug message_received preflight ctx=${JSON.stringify(debugCtxSummary(c, e))}`);
@@ -549,7 +620,11 @@ export default {
// Turn management on message received
if (preChannelId) {
ensureTurnOrder(api, preChannelId);
const from = typeof (e as Record<string, unknown>).from === "string" ? (e as Record<string, unknown>).from as string : "";
// event.from is often the channel target (e.g. "discord:channel:xxx"), NOT the sender.
// The actual sender ID is in event.metadata.senderId.
const metadata = (e as Record<string, unknown>).metadata as Record<string, unknown> | undefined;
const from = (typeof metadata?.senderId === "string" && metadata.senderId)
|| (typeof (e as Record<string, unknown>).from === "string" ? (e as Record<string, unknown>).from as string : "");
// Ignore moderator bot messages — they don't affect turn state
const moderatorUserId = getModeratorUserId(livePre);
@@ -562,6 +637,17 @@ export default {
const humanList = livePre.humanList || livePre.bypassUserIds || [];
const isHuman = humanList.includes(from);
const senderAccountId = typeof c.accountId === "string" ? c.accountId : undefined;
// Track which bot accounts are present in this channel
if (senderAccountId && senderAccountId !== "default") {
const isNew = recordChannelAccount(preChannelId, senderAccountId);
if (isNew) {
// Re-initialize turn order with updated channel membership
ensureTurnOrder(api, preChannelId);
api.logger.info(`whispergate: new account ${senderAccountId} seen in channel=${preChannelId}, turn order updated`);
}
}
onNewMessage(preChannelId, senderAccountId, isHuman);
if (shouldDebugLog(livePre, preChannelId)) {
api.logger.info(`whispergate: turn onNewMessage channel=${preChannelId} from=${from} isHuman=${isHuman} accountId=${senderAccountId ?? "unknown"}`);
@@ -589,11 +675,25 @@ export default {
);
}
const derived = deriveDecisionInputFromPrompt(prompt, ctx.messageProvider);
const derived = deriveDecisionInputFromPrompt(prompt, ctx.messageProvider, ctx.channelId);
// Fallback: extract channelId from sessionKey (format "agent:<id>:discord:channel:<channelId>")
if (!derived.channelId && key) {
const skMatch = key.match(/:channel:(\d+)$/);
if (skMatch) derived.channelId = skMatch[1];
}
// Only proceed if: discord channel AND prompt contains untrusted metadata
const hasConvMarker = prompt.includes("Conversation info (untrusted metadata):");
if (live.discordOnly !== false && (!hasConvMarker || derived.channel !== "discord")) return;
// Always save channelId and accountId mappings for use in later hooks
if (derived.channelId) {
sessionChannelId.set(key, derived.channelId);
}
const resolvedAccountId = resolveAccountId(api, ctx.agentId || "");
if (resolvedAccountId) {
sessionAccountId.set(key, resolvedAccountId);
}
let rec = sessionDecision.get(key);
if (!rec || Date.now() - rec.createdAt > DECISION_TTL_MS) {
if (rec) sessionDecision.delete(key);
@@ -624,30 +724,12 @@ export default {
// This ensures only the current speaker can respond even for human messages.
if (derived.channelId) {
ensureTurnOrder(api, derived.channelId);
// Try resolveAccountId first, fall back to ctx.accountId if not found
let accountId = resolveAccountId(api, ctx.agentId || "");
// Debug log for turn check
if (shouldDebugLog(live, derived.channelId)) {
const turnDebug = getTurnDebugInfo(derived.channelId);
api.logger.info(
`whispergate: turn check preflight agentId=${ctx.agentId ?? "undefined"} ` +
`resolvedAccountId=${accountId ?? "undefined"} ` +
`ctxAccountId=${ctx.accountId ?? "undefined"} ` +
`turnOrderLen=${turnDebug.turnOrder?.length ?? 0} ` +
`currentSpeaker=${turnDebug.currentSpeaker ?? "null"}`,
);
}
// Fallback to ctx.accountId if resolveAccountId failed
if (!accountId && ctx.accountId) {
accountId = String(ctx.accountId);
}
const accountId = resolveAccountId(api, ctx.agentId || "");
if (accountId) {
const turnCheck = checkTurn(derived.channelId, accountId);
if (!turnCheck.allowed) {
// Forced no-reply - record this session as not allowed to speak
sessionAllowed.set(key, false);
api.logger.info(
`whispergate: turn gate blocked session=${key} accountId=${accountId} currentSpeaker=${turnCheck.currentSpeaker} reason=${turnCheck.reason}`,
);
@@ -656,6 +738,8 @@ export default {
modelOverride: live.noReplyModel,
};
}
// Allowed to speak - record this session as allowed
sessionAllowed.set(key, true);
}
}
@@ -712,7 +796,7 @@ export default {
if (rec) sessionDecision.delete(key);
const prompt = ((event as Record<string, unknown>).prompt as string) || "";
const derived = deriveDecisionInputFromPrompt(prompt, ctx.messageProvider);
const derived = deriveDecisionInputFromPrompt(prompt, ctx.messageProvider, ctx.channelId);
const decision = evaluateDecision({
config: live,
@@ -758,7 +842,7 @@ export default {
// Resolve end symbols from config/policy for dynamic instruction
const prompt = ((event as Record<string, unknown>).prompt as string) || "";
const derived = deriveDecisionInputFromPrompt(prompt, ctx.messageProvider);
const derived = deriveDecisionInputFromPrompt(prompt, ctx.messageProvider, ctx.channelId);
const policy = resolvePolicy(live, derived.channelId, policyState.channelPolicies);
const isGroupChat = derived.conv.is_group_chat === true || derived.conv.is_group_chat === "true";
const instruction = buildEndMarkerInstruction(policy.endSymbols, isGroupChat);
@@ -773,7 +857,7 @@ export default {
// Mark session as injected (one-time injection)
sessionInjected.add(key);
api.logger.info(`whispergate: one-time inject end marker for session=${key}, reason=${rec.decision.reason} isGroupChat=${isGroupChat}`);
api.logger.info(`whispergate: prepend end marker instruction for session=${key}, reason=${rec.decision.reason} isGroupChat=${isGroupChat}`);
return { prependContext: identity + instruction };
});
@@ -823,12 +907,164 @@ export default {
},
});
// Handle NO_REPLY detection before message write
// This is where we detect if agent output is NO_REPLY and handle turn advancement
// NOTE: This hook is synchronous, do not use async/await
api.on("before_message_write", (event, ctx) => {
try {
// Debug: print all available keys in event and ctx
api.logger.info(
`whispergate: DEBUG before_message_write eventKeys=${JSON.stringify(Object.keys(event ?? {}))} ctxKeys=${JSON.stringify(Object.keys(ctx ?? {}))}`,
);
// before_message_write ctx only has { agentId, sessionKey }.
// Use session mappings populated during before_model_resolve for channelId/accountId.
// Content comes from event.message (AgentMessage).
let key = ctx.sessionKey;
let channelId: string | undefined;
let accountId: string | undefined;
// Get from session mapping (set in before_model_resolve)
if (key) {
channelId = sessionChannelId.get(key);
accountId = sessionAccountId.get(key);
}
// Extract content from event.message (AgentMessage)
let content = "";
const msg = (event as Record<string, unknown>).message as Record<string, unknown> | undefined;
if (msg) {
// AgentMessage may have content as string or nested
if (typeof msg.content === "string") {
content = msg.content;
} else if (Array.isArray(msg.content)) {
// content might be an array of parts (Anthropic format)
for (const part of msg.content) {
if (typeof part === "string") content += part;
else if (part && typeof part === "object" && typeof (part as Record<string, unknown>).text === "string") {
content += (part as Record<string, unknown>).text;
}
}
}
}
// Fallback to event.content
if (!content) {
content = ((event as Record<string, unknown>).content as string) || "";
}
// Always log for debugging - show all available info
api.logger.info(
`whispergate: DEBUG before_message_write session=${key ?? "undefined"} channel=${channelId ?? "undefined"} accountId=${accountId ?? "undefined"} contentType=${typeof content} content=${String(content).slice(0, 200)}`,
);
if (!key || !channelId || !accountId) return;
const live = getLivePluginConfig(api, baseConfig as WhisperGateConfig) as WhisperGateConfig & DebugConfig;
const trimmed = content.trim();
const isNoReply = /^NO_REPLY$/i.test(trimmed);
// Log turn state for debugging
const turnDebug = getTurnDebugInfo(channelId);
api.logger.info(
`whispergate: DEBUG turn state channel=${channelId} turnOrder=${JSON.stringify(turnDebug.turnOrder)} currentSpeaker=${turnDebug.currentSpeaker} noRepliedThisCycle=${JSON.stringify([...turnDebug.noRepliedThisCycle])}`,
);
if (!isNoReply) {
api.logger.info(
`whispergate: before_message_write content is not NO_REPLY, skipping channel=${channelId}`,
);
return;
}
// Check if this session was forced no-reply or allowed to speak
const wasAllowed = sessionAllowed.get(key);
api.logger.info(
`whispergate: DEBUG NO_REPLY detected session=${key} wasAllowed=${wasAllowed}`,
);
if (wasAllowed === undefined) return; // No record, skip
if (wasAllowed === false) {
// Forced no-reply - do not advance turn
sessionAllowed.delete(key);
api.logger.info(
`whispergate: before_message_write forced no-reply session=${key} channel=${channelId} - not advancing turn`,
);
return;
}
// Allowed to speak (current speaker) but chose NO_REPLY - advance turn
ensureTurnOrder(api, channelId);
const nextSpeaker = onSpeakerDone(channelId, accountId, true);
sessionAllowed.delete(key);
api.logger.info(
`whispergate: before_message_write real no-reply session=${key} channel=${channelId} nextSpeaker=${nextSpeaker ?? "dormant"}`,
);
// If all agents NO_REPLY'd (dormant), don't trigger handoff
if (!nextSpeaker) {
if (shouldDebugLog(live, channelId)) {
api.logger.info(
`whispergate: before_message_write all agents no-reply, going dormant - no handoff`,
);
}
return;
}
// Trigger moderator handoff message (fire-and-forget, don't await)
if (live.moderatorBotToken) {
const nextUserId = resolveDiscordUserId(api, nextSpeaker);
if (nextUserId) {
const handoffMsg = `轮到(<@${nextUserId}>如果没有想说的请直接回复NO_REPLY`;
void sendModeratorMessage(live.moderatorBotToken, channelId, handoffMsg, api.logger).catch((err) => {
api.logger.warn(`whispergate: before_message_write handoff failed: ${String(err)}`);
});
} else {
api.logger.warn(`whispergate: cannot resolve Discord userId for next speaker accountId=${nextSpeaker}`);
}
}
} catch (err) {
api.logger.warn(`whispergate: before_message_write hook failed: ${String(err)}`);
}
});
// Turn advance: when an agent sends a message, check if it signals end of turn
api.on("message_sent", async (event, ctx) => {
try {
const channelId = ctx.channelId;
const accountId = ctx.accountId;
const content = event.content || "";
const key = ctx.sessionKey;
const c = (ctx || {}) as Record<string, unknown>;
const e = (event || {}) as Record<string, unknown>;
// Always log raw context first for debugging
api.logger.info(
`whispergate: DEBUG message_sent RAW ctxKeys=${JSON.stringify(Object.keys(c))} eventKeys=${JSON.stringify(Object.keys(e))} ` +
`ctx.channelId=${String(c.channelId ?? "undefined")} ctx.conversationId=${String(c.conversationId ?? "undefined")} ` +
`ctx.accountId=${String(c.accountId ?? "undefined")} event.to=${String(e.to ?? "undefined")} ` +
`session=${key ?? "undefined"}`,
);
// ctx.channelId is the platform name (e.g. "discord"), NOT the Discord channel snowflake.
// Extract real Discord channel ID from conversationId or event.to.
let channelId = extractDiscordChannelId(c, e);
// Fallback: sessionKey mapping
if (!channelId && key) {
channelId = sessionChannelId.get(key);
}
// Fallback: parse from sessionKey
if (!channelId && key) {
const skMatch = key.match(/:channel:(\d+)$/);
if (skMatch) channelId = skMatch[1];
}
const accountId = (ctx.accountId as string | undefined) || (key ? sessionAccountId.get(key) : undefined);
const content = (event.content as string) || "";
// Debug log
api.logger.info(
`whispergate: DEBUG message_sent RESOLVED session=${key ?? "undefined"} channelId=${channelId ?? "undefined"} accountId=${accountId ?? "undefined"} content=${content.slice(0, 100)}`,
);
if (!channelId || !accountId) return;