From 38b466532175e1fdf669d3f5dcd4b1d4efabafd7 Mon Sep 17 00:00:00 2001 From: hzhang Date: Tue, 26 May 2026 02:25:07 +0100 Subject: [PATCH] =?UTF-8?q?fix(agent-presence):=20upsert=20atomically=20?= =?UTF-8?q?=E2=80=94=20kill=20first-time-insert=20race?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previous setStatus() did read-modify-write: findOne → if-exists save / else create+save Two concurrent first-time writes for the same userId both saw no row, both INSERT'd, second hit unique-key (agent_presences.PRIMARY) and 500'd with "Duplicate entry '' for key 'agent_presences.PRIMARY'" — visible in prod (2026-05-25 23:23:35Z) when Fabric.OpenclawPlugin's presence-sync emitted two PUTs ~10 ms apart for the same agent (its tick-overlap is being fixed separately in nav/Fabric.OpenclawPlugin). Replace with repo.upsert(values, ['userId']) — compiles to MySQL `INSERT … ON DUPLICATE KEY UPDATE`, atomic at the storage engine, no read needed, no race window. Synthesize the returned entity from the values we just wrote rather than a SELECT round-trip; controller only reads {userId, status} off it. Sim verified with 5 parallel PUTs to a fresh userId: all 200, no Duplicate errors in guild log (was: 1 × 200 + 4 × 500 with the old code). Co-Authored-By: Claude Opus 4.7 (1M context) --- src/agents/agent-presence.service.ts | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/src/agents/agent-presence.service.ts b/src/agents/agent-presence.service.ts index 70f1ab3..221a198 100644 --- a/src/agents/agent-presence.service.ts +++ b/src/agents/agent-presence.service.ts @@ -39,15 +39,23 @@ export class AgentPresenceService { * Upsert a user's presence. Source is a free-text tag for debugging * (e.g. "hf-plugin", "manual", "test"). PUT /agents/:id/presence * calls this; the plugin pushes only on diff so writes are sparse. + * + * Implementation note: the older findOne+save split was a read-modify- + * write race — two concurrent first-time writes for the same userId + * would both read no row, both INSERT, second hits unique-key dup + * (`agent_presences.PRIMARY`) and 500s. Fabric.OpenclawPlugin's + * presence-sync occasionally fires two PUTs for the same agent within + * ~10 ms (tick overlap on its side — separate fix in the plugin), + * which surfaced this race in prod. + * + * `repo.upsert(values, conflictPaths)` compiles to MySQL + * `INSERT … ON DUPLICATE KEY UPDATE` and is atomic at the storage + * engine level — no read needed, no race window. We synthesize the + * returned entity from what we just wrote rather than round-tripping + * a SELECT — the controller only reads {userId, status} off it. */ async setStatus(userId: string, status: PresenceStatus, source: string): Promise { - const existing = await this.repo.findOne({ where: { userId } }); - if (existing) { - existing.status = status; - existing.source = source; - return this.repo.save(existing); - } - const row = this.repo.create({ userId, status, source }); - return this.repo.save(row); + await this.repo.upsert({ userId, status, source }, ['userId']); + return this.repo.create({ userId, status, source }); } }