fix(agent-presence): upsert atomically — kill first-time-insert race #3

Merged
hzhang merged 1 commits from fix/presence-upsert-race into main 2026-05-26 02:06:20 +00:00

View File

@@ -39,15 +39,23 @@ export class AgentPresenceService {
* Upsert a user's presence. Source is a free-text tag for debugging
* (e.g. "hf-plugin", "manual", "test"). PUT /agents/:id/presence
* calls this; the plugin pushes only on diff so writes are sparse.
*
* Implementation note: the older findOne+save split was a read-modify-
* write race — two concurrent first-time writes for the same userId
* would both read no row, both INSERT, second hits unique-key dup
* (`agent_presences.PRIMARY`) and 500s. Fabric.OpenclawPlugin's
* presence-sync occasionally fires two PUTs for the same agent within
* ~10 ms (tick overlap on its side — separate fix in the plugin),
* which surfaced this race in prod.
*
* `repo.upsert(values, conflictPaths)` compiles to MySQL
* `INSERT … ON DUPLICATE KEY UPDATE` and is atomic at the storage
* engine level — no read needed, no race window. We synthesize the
* returned entity from what we just wrote rather than round-tripping
* a SELECT — the controller only reads {userId, status} off it.
*/
async setStatus(userId: string, status: PresenceStatus, source: string): Promise<AgentPresence> {
const existing = await this.repo.findOne({ where: { userId } });
if (existing) {
existing.status = status;
existing.source = source;
return this.repo.save(existing);
}
const row = this.repo.create({ userId, status, source });
return this.repo.save(row);
await this.repo.upsert({ userId, status, source }, ['userId']);
return this.repo.create({ userId, status, source });
}
}