Files
ContractorAgent/plugin/index.ts
hzhang 453dab3271 fix(bridge): resolve real bridge key past OpenClaw redaction; sane provider timeout
Three install/bridge bugs that made every OpenClaw model call to the
bridge fail when driven by a non-bundled channel plugin (e.g. Fabric):

1. OpenClaw redacts secret-like keys before exposing pluginConfig to a
   plugin, so config.bridgeApiKey was the literal __OPENCLAW_REDACTED__
   sentinel. The bridge then validated Authorization against the
   sentinel while the model provider sent the real key -> permanent
   HTTP 401. Resolve the real shared secret from the raw on-disk config
   (same pattern resolveAgent already uses); if still missing/redacted,
   treat as no-auth on the loopback-only bridge instead of 401-locking.

2. install.mjs set the provider apiKey authoritatively but only
   setIfMissing the plugin bridgeApiKey, so a stale prior value desynced
   the pair. Make bridgeApiKey authoritative too (they must match).

3. The provider had no timeoutSeconds; a full bridged agent turn far
   exceeds OpenClaw's default model-fetch timeout, so OpenClaw aborted
   mid-turn and no reply was ever delivered. Default timeoutSeconds=600
   (preserves a user override).

Verified live: bridge now returns 200 for the real key and a valid
OpenAI SSE completion; the fetch-timeout abort is gone.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-16 09:57:32 +01:00

149 lines
6.6 KiB
TypeScript

import fs from "node:fs";
import path from "node:path";
import { definePluginEntry } from "openclaw/plugin-sdk/plugin-entry";
import type { OpenClawPluginApi } from "openclaw/plugin-sdk/core";
import { normalizePluginConfig } from "./core/types/contractor.js";
import { resolveContractorAgentMetadata } from "./core/contractor/metadata-resolver.js";
import { createBridgeServer } from "./web/server.js";
import { registerCli } from "./commands/register-cli.js";
import type http from "node:http";
// ── GlobalThis state ─────────────────────────────────────────────────────────
// All persistent state lives on globalThis to survive OpenClaw hot-reloads.
// See LESSONS_LEARNED.md items 1, 3, 11.
const _G = globalThis as Record<string, unknown>;
const LIFECYCLE_KEY = "_contractorAgentLifecycleRegistered";
const SERVER_KEY = "_contractorAgentBridgeServer";
/** Key for the live OpenClaw config accessor (getter fn) shared via globalThis. */
const OPENCLAW_CONFIG_KEY = "_contractorOpenClawConfig";
/** OpenClaw replaces secret-like config values with this before exposing
* pluginConfig to plugins. */
const OPENCLAW_REDACTION_SENTINEL = "__OPENCLAW_REDACTED__";
/**
* OpenClaw redacts any secret-like key (anything containing "apiKey") before
* handing `api.pluginConfig` to the plugin, so `pluginConfig.bridgeApiKey` is
* the redaction sentinel — never the real value. The bridge server validates
* inbound requests against this key, while the OpenClaw model provider sends
* the real `models.providers.contractor-agent.apiKey`; using the redacted
* value therefore guarantees a permanent HTTP 401.
*
* Resolve the real shared secret from the raw on-disk config instead (same
* pattern `resolveAgent` already uses). If it is still missing or redacted,
* return "" so the loopback-only bridge skips auth rather than hard-locking.
*/
function resolveBridgeApiKey(fallback: string): string {
try {
const configPath = path.join(
process.env.HOME ?? "/root",
".openclaw",
"openclaw.json",
);
const raw = JSON.parse(fs.readFileSync(configPath, "utf8")) as {
plugins?: {
entries?: Record<string, { config?: { bridgeApiKey?: string } }>;
};
};
const k = raw.plugins?.entries?.["contractor-agent"]?.config?.bridgeApiKey;
if (typeof k === "string" && k && k !== OPENCLAW_REDACTION_SENTINEL) {
return k;
}
} catch {
/* fall through to fallback handling */
}
if (fallback && fallback !== OPENCLAW_REDACTION_SENTINEL) return fallback;
return ""; // loopback-only bridge: skip auth instead of 401-locking
}
// ── Plugin entry ─────────────────────────────────────────────────────────────
export default definePluginEntry({
id: "contractor-agent",
name: "Contractor Agent",
description: "Turns Claude Code into an OpenClaw-managed contractor agent",
// OpenClaw requires register() to be synchronous — returning a Promise
// surfaces as `Error: plugin register must be synchronous` and the plugin
// ends up in `error` state. We avoid `await` here and instead let the
// bridge server bind asynchronously, handling EADDRINUSE via the server's
// `error` event when another gateway/CLI process already owns the port.
register(api: OpenClawPluginApi): void {
const config = normalizePluginConfig(api.pluginConfig);
// Resolve agent metadata for the bridge server's resolveAgent callback.
// We do this by reading openclaw.json — the bridge server calls this on every turn.
function resolveAgent(agentId: string, _sessionKey: string) {
try {
const configPath = path.join(
(process.env.HOME ?? "/root"),
".openclaw",
"openclaw.json",
);
const raw = JSON.parse(fs.readFileSync(configPath, "utf8")) as {
agents?: { list?: Array<{ id: string; workspace?: string; model?: string }> };
};
const agent = raw.agents?.list?.find((a) => a.id === agentId);
if (!agent) return null;
const meta = resolveContractorAgentMetadata(agent, config.permissionMode);
if (!meta) return null;
return { workspace: meta.workspace };
} catch {
return null;
}
}
// ── Gateway lifecycle (start bridge server once per gateway process) ──────
// Always update the config accessor so hot-reloads get fresh config.
// server.ts reads this via globalThis to build tool execution context.
_G[OPENCLAW_CONFIG_KEY] = api.config;
if (!_G[LIFECYCLE_KEY]) {
_G[LIFECYCLE_KEY] = true;
// Bind the bridge server only when the gateway boots, NOT eagerly at
// register-time. register() also runs in one-shot CLI subprocesses
// (e.g. `openclaw completion`, `openclaw doctor`); spawning a long-
// lived listener there would prevent those commands from exiting.
api.on("gateway_start", () => {
const server = createBridgeServer({
port: config.bridgePort,
apiKey: resolveBridgeApiKey(config.bridgeApiKey),
permissionMode: config.permissionMode,
resolveAgent,
logger: api.logger,
});
// EADDRINUSE → another gateway already owns the port; fine, skip bind.
server.on("error", (err: NodeJS.ErrnoException) => {
if (err.code === "EADDRINUSE") {
api.logger.info(
`[contractor-agent] bridge already running on port ${config.bridgePort}, skipping bind`,
);
return;
}
api.logger.warn(`[contractor-agent] bridge server error: ${err.message ?? String(err)}`);
});
// Defense in depth: even if this code path is somehow reached outside
// the gateway, .unref() prevents the listener from pinning the host's
// event loop and blocking process exit.
server.unref();
_G[SERVER_KEY] = server;
});
api.on("gateway_stop", () => {
const s = _G[SERVER_KEY] as http.Server | undefined;
if (s) s.close();
api.logger.info("[contractor-agent] bridge server stopped");
});
}
// ── CLI ───────────────────────────────────────────────────────────────────
registerCli(api);
api.logger.info(`[contractor-agent] plugin registered (bridge port: ${config.bridgePort})`);
},
});