Files
Fabric.Backend.Guild/src/agents/agent-presence.service.ts
hzhang 38b4665321 fix(agent-presence): upsert atomically — kill first-time-insert race
Previous setStatus() did read-modify-write:
  findOne → if-exists save / else create+save

Two concurrent first-time writes for the same userId both saw no row,
both INSERT'd, second hit unique-key (agent_presences.PRIMARY) and 500'd
with "Duplicate entry '<userId>' for key 'agent_presences.PRIMARY'" —
visible in prod (2026-05-25 23:23:35Z) when Fabric.OpenclawPlugin's
presence-sync emitted two PUTs ~10 ms apart for the same agent (its
tick-overlap is being fixed separately in nav/Fabric.OpenclawPlugin).

Replace with repo.upsert(values, ['userId']) — compiles to MySQL
`INSERT … ON DUPLICATE KEY UPDATE`, atomic at the storage engine,
no read needed, no race window. Synthesize the returned entity from
the values we just wrote rather than a SELECT round-trip; controller
only reads {userId, status} off it.

Sim verified with 5 parallel PUTs to a fresh userId: all 200, no
Duplicate errors in guild log (was: 1 × 200 + 4 × 500 with the
old code).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-26 02:25:07 +01:00

62 lines
2.6 KiB
TypeScript

import { Injectable } from '@nestjs/common';
import { InjectRepository } from '@nestjs/typeorm';
import { Repository } from 'typeorm';
import { AgentPresence } from '../entities/agent-presence.entity.js';
export type PresenceStatus = 'idle' | 'on_call' | 'busy' | 'exhausted' | 'offline' | 'unknown';
@Injectable()
export class AgentPresenceService {
constructor(
@InjectRepository(AgentPresence)
private readonly repo: Repository<AgentPresence>,
) {}
/**
* Get a user's current presence. Returns 'unknown' if no row.
* Used by `RealtimeGateway` per-recipient when xType === 'announce'.
*/
async getStatus(userId: string): Promise<PresenceStatus> {
if (!userId) return 'unknown';
const row = await this.repo.findOne({ where: { userId } });
return row?.status ?? 'unknown';
}
/** Bulk variant for delivery-time lookups across many recipients in one trip. */
async getStatusMap(userIds: string[]): Promise<Map<string, PresenceStatus>> {
const out = new Map<string, PresenceStatus>();
for (const id of userIds) out.set(id, 'unknown');
if (userIds.length === 0) return out;
const rows = await this.repo
.createQueryBuilder('p')
.where('p.userId IN (:...ids)', { ids: userIds })
.getMany();
for (const r of rows) out.set(r.userId, r.status);
return out;
}
/**
* Upsert a user's presence. Source is a free-text tag for debugging
* (e.g. "hf-plugin", "manual", "test"). PUT /agents/:id/presence
* calls this; the plugin pushes only on diff so writes are sparse.
*
* Implementation note: the older findOne+save split was a read-modify-
* write race — two concurrent first-time writes for the same userId
* would both read no row, both INSERT, second hits unique-key dup
* (`agent_presences.PRIMARY`) and 500s. Fabric.OpenclawPlugin's
* presence-sync occasionally fires two PUTs for the same agent within
* ~10 ms (tick overlap on its side — separate fix in the plugin),
* which surfaced this race in prod.
*
* `repo.upsert(values, conflictPaths)` compiles to MySQL
* `INSERT … ON DUPLICATE KEY UPDATE` and is atomic at the storage
* engine level — no read needed, no race window. We synthesize the
* returned entity from what we just wrote rather than round-tripping
* a SELECT — the controller only reads {userId, status} off it.
*/
async setStatus(userId: string, status: PresenceStatus, source: string): Promise<AgentPresence> {
await this.repo.upsert({ userId, status, source }, ['userId']);
return this.repo.create({ userId, status, source });
}
}