2 Commits

Author SHA1 Message Date
h z
79b29db26c fix(presence-sync): /api prefix + Bearer guildAccessToken (#7) 2026-05-25 23:17:45 +00:00
a87de27cff fix(presence-sync): use /api prefix + Bearer guildAccessToken (not x-api-key)
Two layered bugs in the presence-sync loop, both causing every PUT to
fail forever in prod:

1. **Missing /api prefix.** URL was `${guildBaseUrl}/agents/<id>/presence`
   but the guild backend sets a global prefix 'api' in main.ts
   `setGlobalPrefix('api')`. Every other REST call in this plugin
   (channel.ts channels list, fabric-client.ts postMessage, canvas)
   already prepends /api/ — only presence-sync missed it. Returned 404
   "Cannot PUT /agents/...".

2. **Wrong auth scheme.** Plugin sent `x-api-key: <fabricApiKey>`, but
   the endpoint sits behind the global APP_GUARD = ApiKeyGuard, which
   actually expects `Authorization: Bearer <guildAccessToken>` (despite
   its name — confusing naming on the backend side). With /api added,
   error became 401 "missing bearer token". Confirmed by `docker exec
   fabric-backend-guild grep APP_GUARD /app/dist/app.module.js` and
   manual curl: Bearer guild token → 200 OK.

**Fix**

- presence-sync.ts: do agent-login on demand to obtain a fresh
  guildAccessToken, cache it per-agent for 13 min (under the 15-min
  JWT TTL), use it as Bearer for the PUT. 401 response invalidates
  the cache so the next tick re-logs-in. Pushes are gated on status
  changes (rare), so the login overhead is negligible.

- inbound.ts: firstGuildEndpointByAgent → firstGuildByAgent storing
  both endpoint and nodeId (presence-sync needs nodeId to pick the
  right token out of guildAccessTokens[]).

- index.ts: pass FabricClient to PresenceSync constructor.

**Verified in sim**

After restart, gateway log shows `fabric: presence-sync recruiter →
idle` (200 OK), zero failed PUTs, where previously it would log a 404
every ~5s per agent.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-25 23:54:38 +01:00
6 changed files with 149 additions and 42 deletions

View File

@@ -94,7 +94,7 @@ export default defineChannelPluginEntry({
void inbound.start().then(() => {
if (!inbound)
return;
presence = new PresenceSync(api.logger);
presence = new PresenceSync(api.logger, client);
presence.setAccounts(inbound.getPresenceAccounts());
presence.start();
api.logger.info(`fabric: presence-sync started for ${inbound.getPresenceAccounts().length} account(s)`);

View File

@@ -228,30 +228,34 @@ export class FabricInbound {
for (const entry of this.identity.list()) {
if (!entry.fabricUserId)
continue;
const presenceGuildUrl = this.firstGuildEndpointByAgent.get(entry.agentId);
if (!presenceGuildUrl)
const presenceGuild = this.firstGuildByAgent.get(entry.agentId);
if (!presenceGuild)
continue;
out.push({
agentId: entry.agentId,
fabricUserId: entry.fabricUserId,
guildBaseUrl: presenceGuildUrl,
guildBaseUrl: presenceGuild.endpoint,
guildNodeId: presenceGuild.nodeId,
fabricApiKey: entry.fabricApiKey,
});
}
return out;
}
// Filled by connectAgent for each (agent, guild). Tracks ONLY the first
// guild per agent (used as the presence-push target).
firstGuildEndpointByAgent = new Map();
// guild per agent (used as the presence-push target). Stores both
// endpoint and nodeId — presence-sync needs both: endpoint to build
// the URL, nodeId to pick the matching guildAccessToken from a fresh
// agent-login response.
firstGuildByAgent = new Map();
async connectAgent(agentId, session) {
const selfUserId = session.user.id;
// First-guild capture for presence-sync push target. session.guilds is
// already in priority order from Center; we take the first one with a
// valid endpoint and stop. Multi-guild presence is a future concern.
if (!this.firstGuildEndpointByAgent.has(agentId)) {
if (!this.firstGuildByAgent.has(agentId)) {
const firstGuild = session.guilds.find((g) => typeof g.endpoint === 'string' && g.endpoint.length > 0);
if (firstGuild)
this.firstGuildEndpointByAgent.set(agentId, firstGuild.endpoint);
this.firstGuildByAgent.set(agentId, { endpoint: firstGuild.endpoint, nodeId: firstGuild.nodeId });
}
for (const g of session.guilds) {
const tok = session.guildAccessTokens.find((t) => t.guildNodeId === g.nodeId)?.token;

View File

@@ -1,26 +1,16 @@
/**
* presence-sync — read each connected agent's HF status (via the
* cross-plugin `globalThis.__hfAgentStatus.get(agentId)` exposed by
* HarborForge.OpenclawPlugin) and push diffs to Fabric.Backend.Guild
* `PUT /agents/:userId/presence` so the backend can apply busy-discard
* on `announce`-type channel deliveries.
*
* Push model: we only PUT when an agent's status actually changes
* (since the last push). The HF-side accessor has its own TTL cache
* to absorb the every-30s polling.
*
* If HF plugin isn't loaded (`__hfAgentStatus` undefined), the loop
* is a no-op — Fabric backend defaults presence to 'unknown' which is
* treated as not-busy. Announce-channel delivery still works; busy
* filtering simply doesn't kick in.
*/
// Guild access JWTs expire every 900s. Refresh ~2 min early to stay
// safely inside the window even if a tick runs late.
const TOKEN_TTL_MS = (15 - 2) * 60 * 1000;
export class PresenceSync {
logger;
client;
timer = null;
lastStatus = new Map(); // by agentId
accounts = new Map();
constructor(logger) {
tokenCache = new Map(); // by agentId
constructor(logger, client) {
this.logger = logger;
this.client = client;
}
setAccounts(accounts) {
this.accounts.clear();
@@ -42,6 +32,33 @@ export class PresenceSync {
this.timer = null;
}
}
/**
* Fetch a fresh guildAccessToken for `acct`, caching it under the
* agentId until just before its JWT expiry. Returns null on login
* failure or if the session has no matching guild — caller logs +
* skips the PUT.
*/
async ensureGuildToken(acct) {
const now = Date.now();
const cached = this.tokenCache.get(acct.agentId);
if (cached && cached.expiresAt > now)
return cached.token;
let session;
try {
session = await this.client.agentLogin(acct.fabricApiKey);
}
catch (err) {
this.logger.warn(`fabric: presence-sync agent-login failed for ${acct.agentId}: ${String(err)}`);
return null;
}
const entry = session.guildAccessTokens.find((g) => g.guildNodeId === acct.guildNodeId);
if (!entry?.token) {
this.logger.warn(`fabric: presence-sync no guild token for ${acct.agentId} guild=${acct.guildNodeId}`);
return null;
}
this.tokenCache.set(acct.agentId, { token: entry.token, expiresAt: now + TOKEN_TTL_MS });
return entry.token;
}
async tick() {
const bridge = globalThis['__hfAgentStatus'];
if (!bridge || typeof bridge.get !== 'function')
@@ -58,13 +75,22 @@ export class PresenceSync {
continue;
if (this.lastStatus.get(agentId) === status)
continue; // no change → no PUT
const guildToken = await this.ensureGuildToken(acct);
if (!guildToken)
continue;
try {
const url = `${acct.guildBaseUrl.replace(/\/$/, '')}/agents/${encodeURIComponent(acct.fabricUserId)}/presence`;
// Endpoint: PUT /api/agents/:userId/presence. ApiKeyGuard (global
// APP_GUARD) requires `Authorization: Bearer <guildAccessToken>`
// — NOT the agent's raw fabricApiKey. Pre-v1: this loop sent
// x-api-key and got 401 "missing bearer token" forever. The /api
// prefix is required because the guild backend sets a global
// 'api' prefix in main.ts setGlobalPrefix('api').
const url = `${acct.guildBaseUrl.replace(/\/$/, '')}/api/agents/${encodeURIComponent(acct.fabricUserId)}/presence`;
const res = await fetch(url, {
method: 'PUT',
headers: {
'content-type': 'application/json',
'x-api-key': acct.fabricApiKey,
authorization: `Bearer ${guildToken}`,
},
body: JSON.stringify({ status, source: 'hf-plugin' }),
});
@@ -73,6 +99,11 @@ export class PresenceSync {
this.logger.info(`fabric: presence-sync ${agentId}${status}`);
}
else {
// 401 here usually means the cached token went stale unexpectedly
// (server-side rotation or clock skew) — drop the cache so the
// next tick re-logs-in.
if (res.status === 401)
this.tokenCache.delete(agentId);
this.logger.warn(`fabric: presence-sync PUT ${agentId} failed: ${res.status}`);
}
}

View File

@@ -116,7 +116,7 @@ export default defineChannelPluginEntry({
// their fabricUserId + first guild endpoint populated).
void inbound.start().then(() => {
if (!inbound) return;
presence = new PresenceSync(api.logger);
presence = new PresenceSync(api.logger, client);
presence.setAccounts(inbound.getPresenceAccounts());
presence.start();
api.logger.info(`fabric: presence-sync started for ${inbound.getPresenceAccounts().length} account(s)`);

View File

@@ -281,17 +281,25 @@ export class FabricInbound {
agentId: string;
fabricUserId: string;
guildBaseUrl: string;
guildNodeId: string;
fabricApiKey: string;
}> {
const out: Array<{ agentId: string; fabricUserId: string; guildBaseUrl: string; fabricApiKey: string }> = [];
const out: Array<{
agentId: string;
fabricUserId: string;
guildBaseUrl: string;
guildNodeId: string;
fabricApiKey: string;
}> = [];
for (const entry of this.identity.list()) {
if (!entry.fabricUserId) continue;
const presenceGuildUrl = this.firstGuildEndpointByAgent.get(entry.agentId);
if (!presenceGuildUrl) continue;
const presenceGuild = this.firstGuildByAgent.get(entry.agentId);
if (!presenceGuild) continue;
out.push({
agentId: entry.agentId,
fabricUserId: entry.fabricUserId,
guildBaseUrl: presenceGuildUrl,
guildBaseUrl: presenceGuild.endpoint,
guildNodeId: presenceGuild.nodeId,
fabricApiKey: entry.fabricApiKey,
});
}
@@ -299,17 +307,20 @@ export class FabricInbound {
}
// Filled by connectAgent for each (agent, guild). Tracks ONLY the first
// guild per agent (used as the presence-push target).
private firstGuildEndpointByAgent = new Map<string, string>();
// guild per agent (used as the presence-push target). Stores both
// endpoint and nodeId — presence-sync needs both: endpoint to build
// the URL, nodeId to pick the matching guildAccessToken from a fresh
// agent-login response.
private firstGuildByAgent = new Map<string, { endpoint: string; nodeId: string }>();
private async connectAgent(agentId: string, session: FabricSession): Promise<void> {
const selfUserId = session.user.id;
// First-guild capture for presence-sync push target. session.guilds is
// already in priority order from Center; we take the first one with a
// valid endpoint and stop. Multi-guild presence is a future concern.
if (!this.firstGuildEndpointByAgent.has(agentId)) {
if (!this.firstGuildByAgent.has(agentId)) {
const firstGuild = session.guilds.find((g) => typeof g.endpoint === 'string' && g.endpoint.length > 0);
if (firstGuild) this.firstGuildEndpointByAgent.set(agentId, firstGuild.endpoint);
if (firstGuild) this.firstGuildByAgent.set(agentId, { endpoint: firstGuild.endpoint, nodeId: firstGuild.nodeId });
}
for (const g of session.guilds) {
const tok = session.guildAccessTokens.find((t) => t.guildNodeId === g.nodeId)?.token;

View File

@@ -2,18 +2,26 @@
* presence-sync — read each connected agent's HF status (via the
* cross-plugin `globalThis.__hfAgentStatus.get(agentId)` exposed by
* HarborForge.OpenclawPlugin) and push diffs to Fabric.Backend.Guild
* `PUT /agents/:userId/presence` so the backend can apply busy-discard
* on `announce`-type channel deliveries.
* `PUT /api/agents/:userId/presence` so the backend can apply
* busy-discard on `announce`-type channel deliveries.
*
* Push model: we only PUT when an agent's status actually changes
* (since the last push). The HF-side accessor has its own TTL cache
* to absorb the every-30s polling.
*
* Auth: the endpoint sits behind ApiKeyGuard (global APP_GUARD per
* app.module.js) which expects `Authorization: Bearer <guild-token>`
* — NOT the agent's fabricApiKey directly. So before each PUT we do
* a fresh agent-login (or reuse a cached token if still within its
* 15-min JWT TTL) and pull the guildAccessToken matching the target
* guild. Status changes are rare enough that login overhead is fine.
*
* If HF plugin isn't loaded (`__hfAgentStatus` undefined), the loop
* is a no-op — Fabric backend defaults presence to 'unknown' which is
* treated as not-busy. Announce-channel delivery still works; busy
* filtering simply doesn't kick in.
*/
import type { FabricClient } from './fabric-client.js';
type HfStatus = 'idle' | 'on_call' | 'busy' | 'exhausted' | 'offline';
type Bridge = { get(agentId: string): Promise<HfStatus | undefined> };
@@ -23,15 +31,26 @@ export interface PresenceSyncAccount {
agentId: string;
fabricUserId: string; // the agent's Fabric Center user id (UUID)
guildBaseUrl: string; // e.g. https://fabric.hangman-lab.top/guild/<id>
fabricApiKey: string; // existing per-account key
guildNodeId: string; // which guildAccessTokens[].guildNodeId to pick
fabricApiKey: string; // existing per-account key (used for agent-login)
}
// Guild access JWTs expire every 900s. Refresh ~2 min early to stay
// safely inside the window even if a tick runs late.
const TOKEN_TTL_MS = (15 - 2) * 60 * 1000;
interface CachedToken {
token: string;
expiresAt: number; // epoch ms
}
export class PresenceSync {
private timer: ReturnType<typeof setInterval> | null = null;
private readonly lastStatus = new Map<string, HfStatus>(); // by agentId
private readonly accounts = new Map<string, PresenceSyncAccount>();
private readonly tokenCache = new Map<string, CachedToken>(); // by agentId
constructor(private readonly logger: Logger) {}
constructor(private readonly logger: Logger, private readonly client: FabricClient) {}
setAccounts(accounts: PresenceSyncAccount[]): void {
this.accounts.clear();
@@ -54,6 +73,35 @@ export class PresenceSync {
}
}
/**
* Fetch a fresh guildAccessToken for `acct`, caching it under the
* agentId until just before its JWT expiry. Returns null on login
* failure or if the session has no matching guild — caller logs +
* skips the PUT.
*/
private async ensureGuildToken(acct: PresenceSyncAccount): Promise<string | null> {
const now = Date.now();
const cached = this.tokenCache.get(acct.agentId);
if (cached && cached.expiresAt > now) return cached.token;
let session;
try {
session = await this.client.agentLogin(acct.fabricApiKey);
} catch (err) {
this.logger.warn(`fabric: presence-sync agent-login failed for ${acct.agentId}: ${String(err)}`);
return null;
}
const entry = session.guildAccessTokens.find((g) => g.guildNodeId === acct.guildNodeId);
if (!entry?.token) {
this.logger.warn(
`fabric: presence-sync no guild token for ${acct.agentId} guild=${acct.guildNodeId}`,
);
return null;
}
this.tokenCache.set(acct.agentId, { token: entry.token, expiresAt: now + TOKEN_TTL_MS });
return entry.token;
}
private async tick(): Promise<void> {
const bridge = (globalThis as Record<string, unknown>)['__hfAgentStatus'] as Bridge | undefined;
if (!bridge || typeof bridge.get !== 'function') return; // HF plugin not loaded — skip
@@ -68,13 +116,22 @@ export class PresenceSync {
if (!status) continue;
if (this.lastStatus.get(agentId) === status) continue; // no change → no PUT
const guildToken = await this.ensureGuildToken(acct);
if (!guildToken) continue;
try {
const url = `${acct.guildBaseUrl.replace(/\/$/, '')}/agents/${encodeURIComponent(acct.fabricUserId)}/presence`;
// Endpoint: PUT /api/agents/:userId/presence. ApiKeyGuard (global
// APP_GUARD) requires `Authorization: Bearer <guildAccessToken>`
// — NOT the agent's raw fabricApiKey. Pre-v1: this loop sent
// x-api-key and got 401 "missing bearer token" forever. The /api
// prefix is required because the guild backend sets a global
// 'api' prefix in main.ts setGlobalPrefix('api').
const url = `${acct.guildBaseUrl.replace(/\/$/, '')}/api/agents/${encodeURIComponent(acct.fabricUserId)}/presence`;
const res = await fetch(url, {
method: 'PUT',
headers: {
'content-type': 'application/json',
'x-api-key': acct.fabricApiKey,
authorization: `Bearer ${guildToken}`,
},
body: JSON.stringify({ status, source: 'hf-plugin' }),
});
@@ -82,6 +139,10 @@ export class PresenceSync {
this.lastStatus.set(agentId, status);
this.logger.info(`fabric: presence-sync ${agentId}${status}`);
} else {
// 401 here usually means the cached token went stale unexpectedly
// (server-side rotation or clock skew) — drop the cache so the
// next tick re-logs-in.
if (res.status === 401) this.tokenCache.delete(agentId);
this.logger.warn(`fabric: presence-sync PUT ${agentId} failed: ${res.status}`);
}
} catch (err) {