From 9419d270e55ddf0440aa2a8e51948f80ac1ebcf5 Mon Sep 17 00:00:00 2001 From: hzhang Date: Tue, 26 May 2026 02:25:08 +0100 Subject: [PATCH] fix(presence-sync): tick mutex so setInterval overlap can't spawn parallel ticks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The presence-sync tick iterates accounts serially with await on each agent-login + PUT round-trip — a single tick can easily run 20+s when there are several accounts. setInterval(intervalMs) does NOT wait for the previous tick to finish, so on a busy gateway the next tick fires on top of a still-running one and two parallel iterations each PUT the same agentId within ~10 ms. That tipped the guild backend's first-time-insert race (separate fix in nav/Fabric.Backend.Guild) into 500s on prod (caught in t2 gateway 2026-05-25 23:23:35Z; 6 of 6 agents showed paired log lines 4-10 ms apart for the same agent → idle). Fix: a simple `inflight` boolean. tick() returns immediately if already running; the next interval beat catches up. lastStatus !== bridge.get gating already means status changes catch the next tick anyway, so skipping a beat costs nothing the next beat won't fix. Co-Authored-By: Claude Opus 4.7 (1M context) --- dist/fabric/src/presence-sync.js | 24 ++++++++++++++++++++++++ src/presence-sync.ts | 24 ++++++++++++++++++++++++ 2 files changed, 48 insertions(+) diff --git a/dist/fabric/src/presence-sync.js b/dist/fabric/src/presence-sync.js index 18e95c9..51c56b5 100644 --- a/dist/fabric/src/presence-sync.js +++ b/dist/fabric/src/presence-sync.js @@ -8,6 +8,15 @@ export class PresenceSync { lastStatus = new Map(); // by agentId accounts = new Map(); tokenCache = new Map(); // by agentId + // Mutex flag: a tick iterates accounts serially with `await` on each + // agent-login + PUT round-trip, so a single tick can easily run 20+s + // when there are many accounts. setInterval(intervalMs) does NOT wait + // for the previous tick to finish — without this guard the next tick + // fires on top of a still-running one and two parallel iterations + // PUT the same agentId within milliseconds. That tipped the backend's + // first-time-insert race (separate fix in Fabric.Backend.Guild) into + // 500s on prod. Guarded ticks just skip a beat instead. + inflight = false; constructor(logger, client) { this.logger = logger; this.client = client; @@ -60,6 +69,21 @@ export class PresenceSync { return entry.token; } async tick() { + // Mutex: see the `inflight` field declaration for the why. Drop + // overlapping ticks rather than letting them run concurrently — + // status is gated by `lastStatus !== bridge.get`, so skipping a + // beat costs nothing the next beat won't catch. + if (this.inflight) + return; + this.inflight = true; + try { + await this.tickInner(); + } + finally { + this.inflight = false; + } + } + async tickInner() { const bridge = globalThis['__hfAgentStatus']; if (!bridge || typeof bridge.get !== 'function') return; // HF plugin not loaded — skip diff --git a/src/presence-sync.ts b/src/presence-sync.ts index 8b35e73..fca3e36 100644 --- a/src/presence-sync.ts +++ b/src/presence-sync.ts @@ -50,6 +50,16 @@ export class PresenceSync { private readonly accounts = new Map(); private readonly tokenCache = new Map(); // by agentId + // Mutex flag: a tick iterates accounts serially with `await` on each + // agent-login + PUT round-trip, so a single tick can easily run 20+s + // when there are many accounts. setInterval(intervalMs) does NOT wait + // for the previous tick to finish — without this guard the next tick + // fires on top of a still-running one and two parallel iterations + // PUT the same agentId within milliseconds. That tipped the backend's + // first-time-insert race (separate fix in Fabric.Backend.Guild) into + // 500s on prod. Guarded ticks just skip a beat instead. + private inflight = false; + constructor(private readonly logger: Logger, private readonly client: FabricClient) {} setAccounts(accounts: PresenceSyncAccount[]): void { @@ -103,6 +113,20 @@ export class PresenceSync { } private async tick(): Promise { + // Mutex: see the `inflight` field declaration for the why. Drop + // overlapping ticks rather than letting them run concurrently — + // status is gated by `lastStatus !== bridge.get`, so skipping a + // beat costs nothing the next beat won't catch. + if (this.inflight) return; + this.inflight = true; + try { + await this.tickInner(); + } finally { + this.inflight = false; + } + } + + private async tickInner(): Promise { const bridge = (globalThis as Record)['__hfAgentStatus'] as Bridge | undefined; if (!bridge || typeof bridge.get !== 'function') return; // HF plugin not loaded — skip