From a87de27cff9e9c02f4d23e2a7003ec2264eff7d0 Mon Sep 17 00:00:00 2001 From: hzhang Date: Mon, 25 May 2026 23:54:38 +0100 Subject: [PATCH] fix(presence-sync): use /api prefix + Bearer guildAccessToken (not x-api-key) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two layered bugs in the presence-sync loop, both causing every PUT to fail forever in prod: 1. **Missing /api prefix.** URL was `${guildBaseUrl}/agents//presence` but the guild backend sets a global prefix 'api' in main.ts `setGlobalPrefix('api')`. Every other REST call in this plugin (channel.ts channels list, fabric-client.ts postMessage, canvas) already prepends /api/ — only presence-sync missed it. Returned 404 "Cannot PUT /agents/...". 2. **Wrong auth scheme.** Plugin sent `x-api-key: `, but the endpoint sits behind the global APP_GUARD = ApiKeyGuard, which actually expects `Authorization: Bearer ` (despite its name — confusing naming on the backend side). With /api added, error became 401 "missing bearer token". Confirmed by `docker exec fabric-backend-guild grep APP_GUARD /app/dist/app.module.js` and manual curl: Bearer guild token → 200 OK. **Fix** - presence-sync.ts: do agent-login on demand to obtain a fresh guildAccessToken, cache it per-agent for 13 min (under the 15-min JWT TTL), use it as Bearer for the PUT. 401 response invalidates the cache so the next tick re-logs-in. Pushes are gated on status changes (rare), so the login overhead is negligible. - inbound.ts: firstGuildEndpointByAgent → firstGuildByAgent storing both endpoint and nodeId (presence-sync needs nodeId to pick the right token out of guildAccessTokens[]). - index.ts: pass FabricClient to PresenceSync constructor. **Verified in sim** After restart, gateway log shows `fabric: presence-sync recruiter → idle` (200 OK), zero failed PUTs, where previously it would log a 404 every ~5s per agent. Co-Authored-By: Claude Opus 4.7 (1M context) --- dist/fabric/index.js | 2 +- dist/fabric/src/inbound.js | 18 +++++--- dist/fabric/src/presence-sync.js | 69 +++++++++++++++++++++--------- index.ts | 2 +- src/inbound.ts | 27 ++++++++---- src/presence-sync.ts | 73 +++++++++++++++++++++++++++++--- 6 files changed, 149 insertions(+), 42 deletions(-) diff --git a/dist/fabric/index.js b/dist/fabric/index.js index b9be039..2e120b9 100644 --- a/dist/fabric/index.js +++ b/dist/fabric/index.js @@ -94,7 +94,7 @@ export default defineChannelPluginEntry({ void inbound.start().then(() => { if (!inbound) return; - presence = new PresenceSync(api.logger); + presence = new PresenceSync(api.logger, client); presence.setAccounts(inbound.getPresenceAccounts()); presence.start(); api.logger.info(`fabric: presence-sync started for ${inbound.getPresenceAccounts().length} account(s)`); diff --git a/dist/fabric/src/inbound.js b/dist/fabric/src/inbound.js index 06ebb35..f81d871 100644 --- a/dist/fabric/src/inbound.js +++ b/dist/fabric/src/inbound.js @@ -228,30 +228,34 @@ export class FabricInbound { for (const entry of this.identity.list()) { if (!entry.fabricUserId) continue; - const presenceGuildUrl = this.firstGuildEndpointByAgent.get(entry.agentId); - if (!presenceGuildUrl) + const presenceGuild = this.firstGuildByAgent.get(entry.agentId); + if (!presenceGuild) continue; out.push({ agentId: entry.agentId, fabricUserId: entry.fabricUserId, - guildBaseUrl: presenceGuildUrl, + guildBaseUrl: presenceGuild.endpoint, + guildNodeId: presenceGuild.nodeId, fabricApiKey: entry.fabricApiKey, }); } return out; } // Filled by connectAgent for each (agent, guild). Tracks ONLY the first - // guild per agent (used as the presence-push target). - firstGuildEndpointByAgent = new Map(); + // guild per agent (used as the presence-push target). Stores both + // endpoint and nodeId — presence-sync needs both: endpoint to build + // the URL, nodeId to pick the matching guildAccessToken from a fresh + // agent-login response. + firstGuildByAgent = new Map(); async connectAgent(agentId, session) { const selfUserId = session.user.id; // First-guild capture for presence-sync push target. session.guilds is // already in priority order from Center; we take the first one with a // valid endpoint and stop. Multi-guild presence is a future concern. - if (!this.firstGuildEndpointByAgent.has(agentId)) { + if (!this.firstGuildByAgent.has(agentId)) { const firstGuild = session.guilds.find((g) => typeof g.endpoint === 'string' && g.endpoint.length > 0); if (firstGuild) - this.firstGuildEndpointByAgent.set(agentId, firstGuild.endpoint); + this.firstGuildByAgent.set(agentId, { endpoint: firstGuild.endpoint, nodeId: firstGuild.nodeId }); } for (const g of session.guilds) { const tok = session.guildAccessTokens.find((t) => t.guildNodeId === g.nodeId)?.token; diff --git a/dist/fabric/src/presence-sync.js b/dist/fabric/src/presence-sync.js index c7a6921..18e95c9 100644 --- a/dist/fabric/src/presence-sync.js +++ b/dist/fabric/src/presence-sync.js @@ -1,26 +1,16 @@ -/** - * presence-sync — read each connected agent's HF status (via the - * cross-plugin `globalThis.__hfAgentStatus.get(agentId)` exposed by - * HarborForge.OpenclawPlugin) and push diffs to Fabric.Backend.Guild - * `PUT /agents/:userId/presence` so the backend can apply busy-discard - * on `announce`-type channel deliveries. - * - * Push model: we only PUT when an agent's status actually changes - * (since the last push). The HF-side accessor has its own TTL cache - * to absorb the every-30s polling. - * - * If HF plugin isn't loaded (`__hfAgentStatus` undefined), the loop - * is a no-op — Fabric backend defaults presence to 'unknown' which is - * treated as not-busy. Announce-channel delivery still works; busy - * filtering simply doesn't kick in. - */ +// Guild access JWTs expire every 900s. Refresh ~2 min early to stay +// safely inside the window even if a tick runs late. +const TOKEN_TTL_MS = (15 - 2) * 60 * 1000; export class PresenceSync { logger; + client; timer = null; lastStatus = new Map(); // by agentId accounts = new Map(); - constructor(logger) { + tokenCache = new Map(); // by agentId + constructor(logger, client) { this.logger = logger; + this.client = client; } setAccounts(accounts) { this.accounts.clear(); @@ -42,6 +32,33 @@ export class PresenceSync { this.timer = null; } } + /** + * Fetch a fresh guildAccessToken for `acct`, caching it under the + * agentId until just before its JWT expiry. Returns null on login + * failure or if the session has no matching guild — caller logs + + * skips the PUT. + */ + async ensureGuildToken(acct) { + const now = Date.now(); + const cached = this.tokenCache.get(acct.agentId); + if (cached && cached.expiresAt > now) + return cached.token; + let session; + try { + session = await this.client.agentLogin(acct.fabricApiKey); + } + catch (err) { + this.logger.warn(`fabric: presence-sync agent-login failed for ${acct.agentId}: ${String(err)}`); + return null; + } + const entry = session.guildAccessTokens.find((g) => g.guildNodeId === acct.guildNodeId); + if (!entry?.token) { + this.logger.warn(`fabric: presence-sync no guild token for ${acct.agentId} guild=${acct.guildNodeId}`); + return null; + } + this.tokenCache.set(acct.agentId, { token: entry.token, expiresAt: now + TOKEN_TTL_MS }); + return entry.token; + } async tick() { const bridge = globalThis['__hfAgentStatus']; if (!bridge || typeof bridge.get !== 'function') @@ -58,13 +75,22 @@ export class PresenceSync { continue; if (this.lastStatus.get(agentId) === status) continue; // no change → no PUT + const guildToken = await this.ensureGuildToken(acct); + if (!guildToken) + continue; try { - const url = `${acct.guildBaseUrl.replace(/\/$/, '')}/agents/${encodeURIComponent(acct.fabricUserId)}/presence`; + // Endpoint: PUT /api/agents/:userId/presence. ApiKeyGuard (global + // APP_GUARD) requires `Authorization: Bearer ` + // — NOT the agent's raw fabricApiKey. Pre-v1: this loop sent + // x-api-key and got 401 "missing bearer token" forever. The /api + // prefix is required because the guild backend sets a global + // 'api' prefix in main.ts setGlobalPrefix('api'). + const url = `${acct.guildBaseUrl.replace(/\/$/, '')}/api/agents/${encodeURIComponent(acct.fabricUserId)}/presence`; const res = await fetch(url, { method: 'PUT', headers: { 'content-type': 'application/json', - 'x-api-key': acct.fabricApiKey, + authorization: `Bearer ${guildToken}`, }, body: JSON.stringify({ status, source: 'hf-plugin' }), }); @@ -73,6 +99,11 @@ export class PresenceSync { this.logger.info(`fabric: presence-sync ${agentId} → ${status}`); } else { + // 401 here usually means the cached token went stale unexpectedly + // (server-side rotation or clock skew) — drop the cache so the + // next tick re-logs-in. + if (res.status === 401) + this.tokenCache.delete(agentId); this.logger.warn(`fabric: presence-sync PUT ${agentId} failed: ${res.status}`); } } diff --git a/index.ts b/index.ts index 06bd5ce..7185e16 100644 --- a/index.ts +++ b/index.ts @@ -116,7 +116,7 @@ export default defineChannelPluginEntry({ // their fabricUserId + first guild endpoint populated). void inbound.start().then(() => { if (!inbound) return; - presence = new PresenceSync(api.logger); + presence = new PresenceSync(api.logger, client); presence.setAccounts(inbound.getPresenceAccounts()); presence.start(); api.logger.info(`fabric: presence-sync started for ${inbound.getPresenceAccounts().length} account(s)`); diff --git a/src/inbound.ts b/src/inbound.ts index 4b15a3e..01e151b 100644 --- a/src/inbound.ts +++ b/src/inbound.ts @@ -281,17 +281,25 @@ export class FabricInbound { agentId: string; fabricUserId: string; guildBaseUrl: string; + guildNodeId: string; fabricApiKey: string; }> { - const out: Array<{ agentId: string; fabricUserId: string; guildBaseUrl: string; fabricApiKey: string }> = []; + const out: Array<{ + agentId: string; + fabricUserId: string; + guildBaseUrl: string; + guildNodeId: string; + fabricApiKey: string; + }> = []; for (const entry of this.identity.list()) { if (!entry.fabricUserId) continue; - const presenceGuildUrl = this.firstGuildEndpointByAgent.get(entry.agentId); - if (!presenceGuildUrl) continue; + const presenceGuild = this.firstGuildByAgent.get(entry.agentId); + if (!presenceGuild) continue; out.push({ agentId: entry.agentId, fabricUserId: entry.fabricUserId, - guildBaseUrl: presenceGuildUrl, + guildBaseUrl: presenceGuild.endpoint, + guildNodeId: presenceGuild.nodeId, fabricApiKey: entry.fabricApiKey, }); } @@ -299,17 +307,20 @@ export class FabricInbound { } // Filled by connectAgent for each (agent, guild). Tracks ONLY the first - // guild per agent (used as the presence-push target). - private firstGuildEndpointByAgent = new Map(); + // guild per agent (used as the presence-push target). Stores both + // endpoint and nodeId — presence-sync needs both: endpoint to build + // the URL, nodeId to pick the matching guildAccessToken from a fresh + // agent-login response. + private firstGuildByAgent = new Map(); private async connectAgent(agentId: string, session: FabricSession): Promise { const selfUserId = session.user.id; // First-guild capture for presence-sync push target. session.guilds is // already in priority order from Center; we take the first one with a // valid endpoint and stop. Multi-guild presence is a future concern. - if (!this.firstGuildEndpointByAgent.has(agentId)) { + if (!this.firstGuildByAgent.has(agentId)) { const firstGuild = session.guilds.find((g) => typeof g.endpoint === 'string' && g.endpoint.length > 0); - if (firstGuild) this.firstGuildEndpointByAgent.set(agentId, firstGuild.endpoint); + if (firstGuild) this.firstGuildByAgent.set(agentId, { endpoint: firstGuild.endpoint, nodeId: firstGuild.nodeId }); } for (const g of session.guilds) { const tok = session.guildAccessTokens.find((t) => t.guildNodeId === g.nodeId)?.token; diff --git a/src/presence-sync.ts b/src/presence-sync.ts index aae0138..8b35e73 100644 --- a/src/presence-sync.ts +++ b/src/presence-sync.ts @@ -2,18 +2,26 @@ * presence-sync — read each connected agent's HF status (via the * cross-plugin `globalThis.__hfAgentStatus.get(agentId)` exposed by * HarborForge.OpenclawPlugin) and push diffs to Fabric.Backend.Guild - * `PUT /agents/:userId/presence` so the backend can apply busy-discard - * on `announce`-type channel deliveries. + * `PUT /api/agents/:userId/presence` so the backend can apply + * busy-discard on `announce`-type channel deliveries. * * Push model: we only PUT when an agent's status actually changes * (since the last push). The HF-side accessor has its own TTL cache * to absorb the every-30s polling. * + * Auth: the endpoint sits behind ApiKeyGuard (global APP_GUARD per + * app.module.js) which expects `Authorization: Bearer ` + * — NOT the agent's fabricApiKey directly. So before each PUT we do + * a fresh agent-login (or reuse a cached token if still within its + * 15-min JWT TTL) and pull the guildAccessToken matching the target + * guild. Status changes are rare enough that login overhead is fine. + * * If HF plugin isn't loaded (`__hfAgentStatus` undefined), the loop * is a no-op — Fabric backend defaults presence to 'unknown' which is * treated as not-busy. Announce-channel delivery still works; busy * filtering simply doesn't kick in. */ +import type { FabricClient } from './fabric-client.js'; type HfStatus = 'idle' | 'on_call' | 'busy' | 'exhausted' | 'offline'; type Bridge = { get(agentId: string): Promise }; @@ -23,15 +31,26 @@ export interface PresenceSyncAccount { agentId: string; fabricUserId: string; // the agent's Fabric Center user id (UUID) guildBaseUrl: string; // e.g. https://fabric.hangman-lab.top/guild/ - fabricApiKey: string; // existing per-account key + guildNodeId: string; // which guildAccessTokens[].guildNodeId to pick + fabricApiKey: string; // existing per-account key (used for agent-login) +} + +// Guild access JWTs expire every 900s. Refresh ~2 min early to stay +// safely inside the window even if a tick runs late. +const TOKEN_TTL_MS = (15 - 2) * 60 * 1000; + +interface CachedToken { + token: string; + expiresAt: number; // epoch ms } export class PresenceSync { private timer: ReturnType | null = null; private readonly lastStatus = new Map(); // by agentId private readonly accounts = new Map(); + private readonly tokenCache = new Map(); // by agentId - constructor(private readonly logger: Logger) {} + constructor(private readonly logger: Logger, private readonly client: FabricClient) {} setAccounts(accounts: PresenceSyncAccount[]): void { this.accounts.clear(); @@ -54,6 +73,35 @@ export class PresenceSync { } } + /** + * Fetch a fresh guildAccessToken for `acct`, caching it under the + * agentId until just before its JWT expiry. Returns null on login + * failure or if the session has no matching guild — caller logs + + * skips the PUT. + */ + private async ensureGuildToken(acct: PresenceSyncAccount): Promise { + const now = Date.now(); + const cached = this.tokenCache.get(acct.agentId); + if (cached && cached.expiresAt > now) return cached.token; + + let session; + try { + session = await this.client.agentLogin(acct.fabricApiKey); + } catch (err) { + this.logger.warn(`fabric: presence-sync agent-login failed for ${acct.agentId}: ${String(err)}`); + return null; + } + const entry = session.guildAccessTokens.find((g) => g.guildNodeId === acct.guildNodeId); + if (!entry?.token) { + this.logger.warn( + `fabric: presence-sync no guild token for ${acct.agentId} guild=${acct.guildNodeId}`, + ); + return null; + } + this.tokenCache.set(acct.agentId, { token: entry.token, expiresAt: now + TOKEN_TTL_MS }); + return entry.token; + } + private async tick(): Promise { const bridge = (globalThis as Record)['__hfAgentStatus'] as Bridge | undefined; if (!bridge || typeof bridge.get !== 'function') return; // HF plugin not loaded — skip @@ -68,13 +116,22 @@ export class PresenceSync { if (!status) continue; if (this.lastStatus.get(agentId) === status) continue; // no change → no PUT + const guildToken = await this.ensureGuildToken(acct); + if (!guildToken) continue; + try { - const url = `${acct.guildBaseUrl.replace(/\/$/, '')}/agents/${encodeURIComponent(acct.fabricUserId)}/presence`; + // Endpoint: PUT /api/agents/:userId/presence. ApiKeyGuard (global + // APP_GUARD) requires `Authorization: Bearer ` + // — NOT the agent's raw fabricApiKey. Pre-v1: this loop sent + // x-api-key and got 401 "missing bearer token" forever. The /api + // prefix is required because the guild backend sets a global + // 'api' prefix in main.ts setGlobalPrefix('api'). + const url = `${acct.guildBaseUrl.replace(/\/$/, '')}/api/agents/${encodeURIComponent(acct.fabricUserId)}/presence`; const res = await fetch(url, { method: 'PUT', headers: { 'content-type': 'application/json', - 'x-api-key': acct.fabricApiKey, + authorization: `Bearer ${guildToken}`, }, body: JSON.stringify({ status, source: 'hf-plugin' }), }); @@ -82,6 +139,10 @@ export class PresenceSync { this.lastStatus.set(agentId, status); this.logger.info(`fabric: presence-sync ${agentId} → ${status}`); } else { + // 401 here usually means the cached token went stale unexpectedly + // (server-side rotation or clock skew) — drop the cache so the + // next tick re-logs-in. + if (res.status === 401) this.tokenCache.delete(agentId); this.logger.warn(`fabric: presence-sync PUT ${agentId} failed: ${res.status}`); } } catch (err) {