fix(bridge): emit empty content delta as heartbeat; preserve user provider fields on reinstall

OpenClaw's LLM idle watchdog (default 120s) fires on lack of *model
progress*, not lack of bytes — an SSE comment frame (": keepalive\n\n")
keeps the TCP socket alive but isn't recognized as progress, so a long
quiet tool-call phase still idles out. When that happens OpenClaw falls
back to re-sending the prior turn's assistant text (pi-embedded:1308
fallbackAnswerText), producing duplicate-Discord-message symptoms.

Heartbeat now emits a real chat.completion.chunk with an empty content
delta every 30s. Clients drop empty deltas; the upstream idle watchdog
should count it as model progress because it's a real event on the
canonical streaming channel.

scripts/install.mjs now spreads the existing provider entry before
overriding script-managed fields, so user-added fields like
timeoutSeconds survive reinstall.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
zhi
2026-05-14 08:53:22 +00:00
parent 1b7cd6b215
commit 0b24330787
2 changed files with 30 additions and 18 deletions

View File

@@ -101,12 +101,13 @@ function parseBody(req: http.IncomingMessage): Promise<BridgeInboundRequest> {
const queueBySession = new Map<string, Promise<void>>(); const queueBySession = new Map<string, Promise<void>>();
/** /**
* SSE keepalive cadence. Bridge writes `: keepalive\n\n` (an SSE comment * SSE heartbeat cadence. Bridge writes an empty-content `chat.completion.chunk`
* frame, no-op for the OpenAI stream parser) on this interval while a turn * (a no-op for OpenAI stream parsers, but a real model-progress event on the
* is in flight or queued. This keeps OpenClaw's LLM idle watchdog (default * canonical streaming channel) on this interval while a turn is in flight or
* 120s of stream silence → attempt failure → retry) from firing when the * queued. This keeps OpenClaw's LLM idle watchdog (default 120s) from firing
* underlying claude subprocess is in a long quiet tool-call phase or while * during long quiet tool-call phases or while we're waiting our turn in the
* we're waiting our turn in the per-session queue. * per-session queue. See the heartbeat block in handleChatCompletions for
* details on why an SSE comment frame is insufficient.
*/ */
const HEARTBEAT_MS = 30_000; const HEARTBEAT_MS = 30_000;
@@ -210,19 +211,27 @@ export function createBridgeServer(config: BridgeServerConfig): http.Server {
"Transfer-Encoding": "chunked", "Transfer-Encoding": "chunked",
}); });
const completionId = `chatcmpl-bridge-${randomUUID().slice(0, 8)}`;
// ── SSE heartbeat ──────────────────────────────────────────────────────── // ── SSE heartbeat ────────────────────────────────────────────────────────
// OpenClaw's idle timeout fires after 120s of total stream silence with // OpenClaw's LLM idle watchdog (default 120s) fires on lack of *model
// no model progress (`LLM idle timeout (120s): no response from model`). // progress*, not lack of bytes — concretely "no content delta through
// claude -p can easily produce 120s+ of zero assistant-text output during // SSE for 120s". An SSE comment frame (`: keepalive\n\n`) keeps the TCP
// long Bash / file / MCP tool sequences, since this bridge only forwards // socket alive but does NOT register as model progress, so a long quiet
// assistant text deltas as SSE chunks (tool_use blocks are not surfaced). // tool-call phase still idles out. When that happens OpenClaw falls back
// A periodic SSE comment frame counts as bytes on the wire and resets // to re-sending the prior turn's assistant text (see
// the upstream idle timer, while being a no-op for the OpenAI stream // pi-embedded-Bcz04p2i.js:1308 `fallbackAnswerText`), producing the
// parser. // duplicate-Discord-message symptom observed 2026-05-14.
//
// We emit a real `chat.completion.chunk` with an empty content delta
// every HEARTBEAT_MS. Clients drop empty deltas, but the upstream idle
// watchdog should count it as model progress because it's a real event
// on the canonical streaming channel. If empty content turns out to be
// filtered, the next step is a zero-width-space "".
const heartbeat = setInterval(() => { const heartbeat = setInterval(() => {
if (clientDisconnected || res.writableEnded) return; if (clientDisconnected || res.writableEnded) return;
try { try {
res.write(": keepalive\n\n"); sseWrite(res, buildChunk(completionId, ""));
} catch { } catch {
/* socket dead, ignore */ /* socket dead, ignore */
} }
@@ -247,8 +256,6 @@ export function createBridgeServer(config: BridgeServerConfig): http.Server {
// doesn't poison the chain forever. // doesn't poison the chain forever.
const myChainTail = prev.then(() => mySlot, () => mySlot); const myChainTail = prev.then(() => mySlot, () => mySlot);
if (sessionKey) queueBySession.set(sessionKey, myChainTail); if (sessionKey) queueBySession.set(sessionKey, myChainTail);
const completionId = `chatcmpl-bridge-${randomUUID().slice(0, 8)}`;
let newSessionId = ""; let newSessionId = "";
let hasError = false; let hasError = false;
let resultErrorReason: string | null = null; let resultErrorReason: string | null = null;

View File

@@ -67,10 +67,15 @@ function install() {
// 3. Update openclaw.json // 3. Update openclaw.json
const cfg = readConfig(); const cfg = readConfig();
// Add provider // Add provider — spread existing first so user-added fields
// (e.g. timeoutSeconds, extraHeaders) survive reinstall. Script-managed
// fields (baseUrl/apiKey/api/models) are then overridden authoritatively
// since they're tied to the constants and model catalog above.
cfg.models = cfg.models ?? {}; cfg.models = cfg.models ?? {};
cfg.models.providers = cfg.models.providers ?? {}; cfg.models.providers = cfg.models.providers ?? {};
const existingProvider = cfg.models.providers[PLUGIN_ID] ?? {};
cfg.models.providers[PLUGIN_ID] = { cfg.models.providers[PLUGIN_ID] = {
...existingProvider,
baseUrl: `http://127.0.0.1:${BRIDGE_PORT}/v1`, baseUrl: `http://127.0.0.1:${BRIDGE_PORT}/v1`,
apiKey: BRIDGE_API_KEY, apiKey: BRIDGE_API_KEY,
api: "openai-completions", api: "openai-completions",