fix(bridge): scope CLI sessions per OpenClaw session and reset on /new
The bridge was keying claudeSessionId by agentId alone, so every Discord
channel, DM, and cron run for a single agent shared one Claude CLI
session. Two consequences in the wild:
- Cross-channel context bleed: 8.7MB session for `developer` mixed
references from channels 1474327736242798612 and 1498579994044010566
plus the operator DM all in one --resume thread.
- `/new` had no effect on the CLI side. OpenClaw rotated its session
file but the bridge kept --resume-ing the same long-lived
claudeSessionId, eventually crossing the 1M model context (debug log
showed `prompt is too long: 1179616 tokens > 1000000 maximum`).
Changes:
* input-filter: extract `chat_id` from the Conversation-info
untrusted-metadata block (scanning all messages, since runtimeOnly
turns put it in the system prompt) and detect bare `/new`/`/reset`
via the BARE_SESSION_RESET_PROMPT_BASE marker. Add buildSessionKey
`${agentId}::${chatId}` and resolveDispatchPrompt fallback for the
empty user message that OpenClaw sends on bare resets.
* server: use the composite session key for getSession/putSession;
on bareSessionReset, removeSession before dispatching so the CLI
starts a fresh session; on a CLI result_error (typically
prompt_too_long) drop the entry too so the next turn doesn't
re-resume into the poisoned context.
* claude/sdk-adapter: surface CLI terminal errors via a new
`result_error` event (carries reason + sessionId) so the bridge
can react instead of just streaming the synthetic
"Prompt is too long" assistant text and silently re-using the
same session.
* index: convert register() to synchronous (OpenClaw rejects async
register with "plugin register must be synchronous"); replace the
pre-bind port probe with a server-level EADDRINUSE handler.
* .gitignore: ignore node_modules/ and dist/.
This commit is contained in:
@@ -8,7 +8,14 @@ import { fileURLToPath } from "node:url";
|
||||
export type ClaudeMessage =
|
||||
| { type: "text"; text: string }
|
||||
| { type: "done"; sessionId: string }
|
||||
| { type: "error"; message: string };
|
||||
| { type: "error"; message: string }
|
||||
/**
|
||||
* Terminal error from the CLI's `result` event (e.g. `is_error: true` with
|
||||
* `terminal_reason: "prompt_too_long"`). The bridge uses this signal to
|
||||
* drop the session-map entry so the next turn starts a fresh CLI session
|
||||
* instead of `--resume`-ing into the same poisoned context.
|
||||
*/
|
||||
| { type: "result_error"; sessionId: string; reason: string; message: string };
|
||||
|
||||
export type OpenAITool = {
|
||||
type: "function";
|
||||
@@ -155,7 +162,9 @@ export async function* dispatchToClaude(
|
||||
|
||||
const rl = createInterface({ input: child.stdout!, crlfDelay: Infinity });
|
||||
|
||||
type CapturedResultError = { reason: string; message: string };
|
||||
let capturedSessionId = "";
|
||||
let capturedResultError = null as CapturedResultError | null;
|
||||
|
||||
const events: ClaudeMessage[] = [];
|
||||
let done = false;
|
||||
@@ -216,6 +225,15 @@ export async function* dispatchToClaude(
|
||||
if (type === "result") {
|
||||
const sessionId = (event.session_id as string) ?? "";
|
||||
if (sessionId) capturedSessionId = sessionId;
|
||||
// CLI signals fatal-but-graceful errors (context overflow, refusal,
|
||||
// billing, etc.) via `is_error: true` on the result event. Capture the
|
||||
// reason so the bridge layer can decide whether to invalidate the
|
||||
// session-map entry (e.g. context overflow → drop, retry next turn).
|
||||
if (event.is_error === true) {
|
||||
const reason = (event.terminal_reason as string) ?? (event.subtype as string) ?? "error";
|
||||
const message = (event.result as string) ?? `claude result error (${reason})`;
|
||||
capturedResultError = { reason, message };
|
||||
}
|
||||
// `result` is the terminal stream-json event; commit the turn without
|
||||
// waiting for claude's process tree to fully exit (leaked Bash grandchildren
|
||||
// can otherwise hold stdout open indefinitely).
|
||||
@@ -250,7 +268,18 @@ export async function* dispatchToClaude(
|
||||
yield events.shift()!;
|
||||
}
|
||||
|
||||
if (capturedSessionId) {
|
||||
// Pull into a local with explicit type so TS doesn't infer the inner field
|
||||
// accesses as `never` (the field is only ever assigned inside the readline
|
||||
// callback above, so closure-based narrowing can't see it from this scope).
|
||||
const resultErr: CapturedResultError | null = capturedResultError;
|
||||
if (resultErr && capturedSessionId) {
|
||||
yield {
|
||||
type: "result_error",
|
||||
sessionId: capturedSessionId,
|
||||
reason: resultErr.reason,
|
||||
message: resultErr.message,
|
||||
};
|
||||
} else if (capturedSessionId) {
|
||||
yield { type: "done", sessionId: capturedSessionId };
|
||||
} else {
|
||||
const stderrSummary = stderrLines.join(" ").slice(0, 200);
|
||||
|
||||
Reference in New Issue
Block a user