From 0b24330787f9454720f082b348ee65041a3ae608 Mon Sep 17 00:00:00 2001 From: zhi Date: Thu, 14 May 2026 08:53:22 +0000 Subject: [PATCH] fix(bridge): emit empty content delta as heartbeat; preserve user provider fields on reinstall MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit OpenClaw's LLM idle watchdog (default 120s) fires on lack of *model progress*, not lack of bytes — an SSE comment frame (": keepalive\n\n") keeps the TCP socket alive but isn't recognized as progress, so a long quiet tool-call phase still idles out. When that happens OpenClaw falls back to re-sending the prior turn's assistant text (pi-embedded:1308 fallbackAnswerText), producing duplicate-Discord-message symptoms. Heartbeat now emits a real chat.completion.chunk with an empty content delta every 30s. Clients drop empty deltas; the upstream idle watchdog should count it as model progress because it's a real event on the canonical streaming channel. scripts/install.mjs now spreads the existing provider entry before overriding script-managed fields, so user-added fields like timeoutSeconds survive reinstall. Co-Authored-By: Claude Opus 4.7 --- plugin/web/server.ts | 41 ++++++++++++++++++++++++----------------- scripts/install.mjs | 7 ++++++- 2 files changed, 30 insertions(+), 18 deletions(-) diff --git a/plugin/web/server.ts b/plugin/web/server.ts index efceb67..45f4776 100644 --- a/plugin/web/server.ts +++ b/plugin/web/server.ts @@ -101,12 +101,13 @@ function parseBody(req: http.IncomingMessage): Promise { const queueBySession = new Map>(); /** - * SSE keepalive cadence. Bridge writes `: keepalive\n\n` (an SSE comment - * frame, no-op for the OpenAI stream parser) on this interval while a turn - * is in flight or queued. This keeps OpenClaw's LLM idle watchdog (default - * 120s of stream silence → attempt failure → retry) from firing when the - * underlying claude subprocess is in a long quiet tool-call phase or while - * we're waiting our turn in the per-session queue. + * SSE heartbeat cadence. Bridge writes an empty-content `chat.completion.chunk` + * (a no-op for OpenAI stream parsers, but a real model-progress event on the + * canonical streaming channel) on this interval while a turn is in flight or + * queued. This keeps OpenClaw's LLM idle watchdog (default 120s) from firing + * during long quiet tool-call phases or while we're waiting our turn in the + * per-session queue. See the heartbeat block in handleChatCompletions for + * details on why an SSE comment frame is insufficient. */ const HEARTBEAT_MS = 30_000; @@ -210,19 +211,27 @@ export function createBridgeServer(config: BridgeServerConfig): http.Server { "Transfer-Encoding": "chunked", }); + const completionId = `chatcmpl-bridge-${randomUUID().slice(0, 8)}`; + // ── SSE heartbeat ──────────────────────────────────────────────────────── - // OpenClaw's idle timeout fires after 120s of total stream silence with - // no model progress (`LLM idle timeout (120s): no response from model`). - // claude -p can easily produce 120s+ of zero assistant-text output during - // long Bash / file / MCP tool sequences, since this bridge only forwards - // assistant text deltas as SSE chunks (tool_use blocks are not surfaced). - // A periodic SSE comment frame counts as bytes on the wire and resets - // the upstream idle timer, while being a no-op for the OpenAI stream - // parser. + // OpenClaw's LLM idle watchdog (default 120s) fires on lack of *model + // progress*, not lack of bytes — concretely "no content delta through + // SSE for 120s". An SSE comment frame (`: keepalive\n\n`) keeps the TCP + // socket alive but does NOT register as model progress, so a long quiet + // tool-call phase still idles out. When that happens OpenClaw falls back + // to re-sending the prior turn's assistant text (see + // pi-embedded-Bcz04p2i.js:1308 `fallbackAnswerText`), producing the + // duplicate-Discord-message symptom observed 2026-05-14. + // + // We emit a real `chat.completion.chunk` with an empty content delta + // every HEARTBEAT_MS. Clients drop empty deltas, but the upstream idle + // watchdog should count it as model progress because it's a real event + // on the canonical streaming channel. If empty content turns out to be + // filtered, the next step is a zero-width-space "​". const heartbeat = setInterval(() => { if (clientDisconnected || res.writableEnded) return; try { - res.write(": keepalive\n\n"); + sseWrite(res, buildChunk(completionId, "")); } catch { /* socket dead, ignore */ } @@ -247,8 +256,6 @@ export function createBridgeServer(config: BridgeServerConfig): http.Server { // doesn't poison the chain forever. const myChainTail = prev.then(() => mySlot, () => mySlot); if (sessionKey) queueBySession.set(sessionKey, myChainTail); - - const completionId = `chatcmpl-bridge-${randomUUID().slice(0, 8)}`; let newSessionId = ""; let hasError = false; let resultErrorReason: string | null = null; diff --git a/scripts/install.mjs b/scripts/install.mjs index 5e7c1ab..77785a2 100644 --- a/scripts/install.mjs +++ b/scripts/install.mjs @@ -67,10 +67,15 @@ function install() { // 3. Update openclaw.json const cfg = readConfig(); - // Add provider + // Add provider — spread existing first so user-added fields + // (e.g. timeoutSeconds, extraHeaders) survive reinstall. Script-managed + // fields (baseUrl/apiKey/api/models) are then overridden authoritatively + // since they're tied to the constants and model catalog above. cfg.models = cfg.models ?? {}; cfg.models.providers = cfg.models.providers ?? {}; + const existingProvider = cfg.models.providers[PLUGIN_ID] ?? {}; cfg.models.providers[PLUGIN_ID] = { + ...existingProvider, baseUrl: `http://127.0.0.1:${BRIDGE_PORT}/v1`, apiKey: BRIDGE_API_KEY, api: "openai-completions",