Files
hzhang 9ba591795b fix: wake dedupe + inline slot context + complete contracts.tools
Three issues making HF→agent wakeup unusable in practice, surfaced by
DinD sim end-to-end test (recruiter agent + slot for 招募 manager task):

1. **Plugin re-woke the same slot every 30s.** The inline runCheck only
   destructured agentId from scheduleCache.getAgentsWithDueSlots() and
   dropped the slots array, then called wakeAgent without recording the
   wake. The simplified inline scheduler also never PATCHes slot status
   server-side from not_started→ongoing, so the next 30s check sees the
   slot still due and wakes again. After 4 wakes the agent's wakeup
   session was full of WAKEUP_OK noise.

   Fix: keep slots in runCheck, add an in-memory wakedSlotKeys set
   keyed by (agentId, slotId|virtual_id|scheduled_at). Dedupe on this
   set; clear it inside the sync interval (fresh wake budget per sync).
   Server-side slot transition still TODO (requires re-introducing the
   CalendarScheduler class path or PATCH /calendar/slots/.../agent-update
   here); the dedupe at least stops the wake spam.

2. **Wakeup message had no slot context.** The wakeup body just said
   'follow hf-wakeup workflow' with no slot id/event_data/task_code.
   The agent then had to call harborforge_calendar_status to learn
   anything — which itself is broken in the simplified scheduler (it
   queries a CalendarScheduler instance that never gets created).

   Fix: pass dueSlots into wakeAgent and inline the highest-priority
   slot's {slot_id, scheduled_at, priority, slot_type, event_data} as
   a JSON block in the wakeup message. The agent reads event_data.
   task_code directly and routes via workflow_lookup without any
   round-trip. Per PLG-CAL-001 docs in hf-hangman-lab SKILL.md, this
   is the documented contract; we are bringing the message in line.

3. **contracts.tools listed 5 of the 9 registered tools.** Manifest had
   harborforge_status/telemetry/monitor_telemetry/calendar_status/
   calendar_complete. Code also registers calendar_abort, calendar_pause,
   calendar_resume, harborforge_restart_status. With the new OpenClaw
   plugin host enforcement (same gotcha that bit Meridian — see
   zhi/Meridian#2), undeclared tools are silently dropped from the
   agent's tool list, so abort/pause/resume cannot be called by the
   agent. plugin doctor was emitting:
   'plugin tool is undeclared (harbor-forge): harborforge_calendar_abort'
   for each missing tool.

   Fix: add the 4 missing tool names to contracts.tools.

Also use api.config as the primary config source in wakeAgent (current
public API), falling back to runtime.config.loadConfig() for older
hosts — same pattern as the Meridian fix.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-20 12:02:25 +01:00

677 lines
22 KiB
TypeScript

/**
* HarborForge Plugin for OpenClaw
*
* Provides monitor-related tools and exposes OpenClaw metadata
* for the HarborForge Monitor bridge (via monitor_port).
*
* Also integrates with HarborForge Calendar system to wake agents
* for scheduled tasks (PLG-CAL-002, PLG-CAL-004).
*
* Sidecar architecture has been removed. Telemetry data is now
* served directly by the plugin when Monitor queries via the
* local monitor_port communication path.
*/
import { hostname, freemem, totalmem, uptime, loadavg, platform } from 'node:os';
import { definePluginEntry } from 'openclaw/plugin-sdk/plugin-entry';
import { MultiAgentScheduleCache } from './calendar/schedule-cache.js';
import { getPluginConfig } from './core/config.js';
import { MonitorBridgeClient, type OpenClawMeta } from './core/monitor-bridge.js';
import type { OpenClawAgentInfo } from './core/openclaw-agents.js';
import { registerGatewayStartHook } from './hooks/gateway-start.js';
import { registerGatewayStopHook } from './hooks/gateway-stop.js';
import {
createCalendarBridgeClient,
createCalendarScheduler,
CalendarScheduler,
AgentWakeContext,
} from './calendar/index.js';
interface PluginAPI {
logger: {
info: (...args: any[]) => void;
error: (...args: any[]) => void;
debug: (...args: any[]) => void;
warn: (...args: any[]) => void;
};
version?: string;
runtime?: {
version?: string;
config?: {
loadConfig?: () => any;
};
};
config?: Record<string, unknown>;
pluginConfig?: Record<string, unknown>;
on: (event: string, handler: () => void) => void;
registerTool: (factory: (ctx: any) => any) => void;
/** Spawn a sub-agent with task context (OpenClaw 2.1+) */
spawn?: (options: {
agentId?: string;
task: string;
model?: string;
timeoutSeconds?: number;
}) => Promise<{ sessionId: string; status: string }>;
/** Get current agent status */
getAgentStatus?: () => Promise<{ status: string } | null>;
}
function register(api: PluginAPI): void {
const logger = api.logger || {
info: (...args: any[]) => console.log('[HarborForge]', ...args),
error: (...args: any[]) => console.error('[HarborForge]', ...args),
debug: (...args: any[]) => console.debug('[HarborForge]', ...args),
warn: (...args: any[]) => console.warn('[HarborForge]', ...args),
};
function resolveConfig() {
return getPluginConfig(api);
}
/** Resolve agent ID from env, config, or fallback. */
function resolveAgentId(): string {
if (process.env.AGENT_ID) return process.env.AGENT_ID;
const cfg = api.runtime?.config?.loadConfig?.();
return cfg?.agents?.list?.[0]?.id ?? cfg?.agents?.defaults?.id ?? 'unknown';
}
/**
* Get the monitor bridge client if monitor_port is configured.
*/
function getBridgeClient(): MonitorBridgeClient | null {
const live = resolveConfig();
const port = live.monitor_port;
if (!port || port <= 0) return null;
return new MonitorBridgeClient(port);
}
/**
* Collect current system telemetry snapshot.
* This data is exposed to the Monitor bridge when it queries the plugin.
*/
function collectTelemetry() {
const live = resolveConfig();
const load = loadavg();
return {
identifier: live.identifier || hostname(),
platform: platform(),
hostname: hostname(),
uptime: uptime(),
memory: {
total: totalmem(),
free: freemem(),
used: totalmem() - freemem(),
usagePercent: ((totalmem() - freemem()) / totalmem()) * 100,
},
load: {
avg1: load[0],
avg5: load[1],
avg15: load[2],
},
openclaw: {
version: api.runtime?.version || api.version || 'unknown',
pluginVersion: '0.3.1', // Bumped for PLG-CAL-004
},
timestamp: new Date().toISOString(),
};
}
// Periodic metadata push interval handle
let metaPushInterval: ReturnType<typeof setInterval> | null = null;
// Calendar scheduler instance
let calendarScheduler: CalendarScheduler | null = null;
/**
* Push OpenClaw metadata to the Monitor bridge.
* This enriches Monitor heartbeats with OpenClaw version/plugin/agent info.
* Failures are non-fatal — Monitor continues to work without this data.
*/
async function pushMetaToMonitor() {
const bridgeClient = getBridgeClient();
if (!bridgeClient) return;
let agentNames: string[] = [];
try {
const cfg = api.runtime?.config?.loadConfig?.();
const agentsList = cfg?.agents?.list;
if (Array.isArray(agentsList)) {
agentNames = agentsList
.map((a: any) => typeof a === 'string' ? a : a?.name)
.filter(Boolean);
}
} catch { /* non-fatal */ }
const meta: OpenClawMeta = {
version: api.runtime?.version || api.version || 'unknown',
plugin_version: '0.3.1',
agents: agentNames.map(name => ({ name })),
};
const ok = await bridgeClient.pushOpenClawMeta(meta);
if (ok) {
logger.debug('pushed OpenClaw metadata to Monitor bridge');
} else {
logger.debug('Monitor bridge unreachable for metadata push (non-fatal)');
}
}
/**
* Get current agent status from OpenClaw.
* Falls back to querying backend if OpenClaw API unavailable.
*/
async function getAgentStatus(): Promise<'idle' | 'on_call' | 'busy' | 'exhausted' | 'offline' | null> {
// Try OpenClaw API first (if available)
if (api.getAgentStatus) {
try {
const status = await api.getAgentStatus();
if (status?.status) {
return status.status as 'idle' | 'on_call' | 'busy' | 'exhausted' | 'offline';
}
} catch (err) {
logger.debug('Failed to get agent status from OpenClaw API:', err);
}
}
// Fallback: query backend for agent status
const live = resolveConfig();
const agentId = resolveAgentId();
try {
const response = await fetch(`${live.backendUrl}/calendar/agent/status?agent_id=${agentId}`, {
headers: {
'X-Agent-ID': agentId,
'X-Claw-Identifier': live.identifier || hostname(),
},
});
if (response.ok) {
const data = await response.json();
return data.status;
}
} catch (err) {
logger.debug('Failed to get agent status from backend:', err);
}
return null;
}
/**
* Wake agent via dispatchInboundMessage — same mechanism used by Discord plugin.
* Direct in-process call, no WebSocket or CLI needed.
*/
async function wakeAgent(
agentId: string,
dueSlots?: Array<{
id?: number | null;
virtual_id?: string | null;
event_data?: any;
scheduled_at?: string;
priority?: number;
slot_type?: string;
[k: string]: unknown;
}>
): Promise<boolean> {
logger.info(`Waking agent ${agentId}: has due slots`);
const sessionKey = `agent:${agentId}:hf-wakeup`;
try {
const sdkPath = 'openclaw/plugin-sdk/reply-runtime';
const { dispatchInboundMessageWithDispatcher } = await import(
/* webpackIgnore: true */ sdkPath
);
// api.config first (current public API). Fall back to deprecated
// runtime.config.loadConfig() for older host versions. Both should
// contain agents.list / channels for dispatch routing.
const cfg = (api as any).config ?? api.runtime?.config?.loadConfig?.();
if (!cfg) {
logger.error('Cannot load OpenClaw config for dispatch');
return false;
}
// Inline the highest-priority due slot's context so the agent does
// not need a second round-trip to harborforge_calendar_status. The
// agent can read event_data.task_code / task_title etc. directly.
let slotBlock = '';
const top = dueSlots && dueSlots.length ? dueSlots[0] : undefined;
if (top) {
slotBlock = `\n\nMatching slot:\n\`\`\`json\n${JSON.stringify(
{
slot_id: top.id ?? null,
virtual_id: top.virtual_id ?? null,
scheduled_at: top.scheduled_at ?? null,
priority: top.priority ?? null,
slot_type: top.slot_type ?? null,
event_data: top.event_data ?? null,
},
null,
2
)}\n\`\`\``;
}
const wakeupMessage =
`You have due slots. Follow the \`hf-wakeup\` workflow of skill ` +
`\`hf-hangman-lab\` to proceed. Only reply \`WAKEUP_OK\` in this ` +
`session.${slotBlock}`;
const result = await dispatchInboundMessageWithDispatcher({
ctx: {
Body: wakeupMessage,
SessionKey: sessionKey,
From: 'harborforge-calendar',
Provider: 'harborforge',
},
cfg,
dispatcherOptions: {
deliver: async (payload: any) => {
const text = (payload.text || '').trim();
logger.info(`Agent ${agentId} wakeup reply: ${text.slice(0, 100)}`);
},
},
});
logger.info(`Agent ${agentId} dispatched: ${result?.status || 'ok'}`);
return true;
} catch (err: any) {
const msg = err?.message || err?.code || String(err);
const stack = err?.stack?.split('\n').slice(0, 3).join(' | ') || '';
logger.error(`Failed to dispatch agent for slot: ${msg} ${stack}`);
return false;
}
}
/**
* Track session completion and update slot status accordingly.
*/
function trackSessionCompletion(sessionId: string, context: AgentWakeContext): void {
// Poll for session completion (simplified approach)
// In production, this would use webhooks or event streaming
const pollInterval = 30000; // 30 seconds
const maxDuration = context.slot.estimated_duration * 60 * 1000; // Convert to ms
const startTime = Date.now();
const poll = async () => {
if (!calendarScheduler) return;
const elapsed = Date.now() - startTime;
// Check if session is complete (would use actual API in production)
// For now, estimate completion based on duration
if (elapsed >= maxDuration) {
// Assume completion
const actualMinutes = Math.round(elapsed / 60000);
await calendarScheduler.completeCurrentSlot(actualMinutes);
return;
}
// Continue polling
setTimeout(poll, pollInterval);
};
// Start polling
setTimeout(poll, pollInterval);
}
/**
* Initialize and start the calendar scheduler.
*/
function startCalendarScheduler(): void {
const live = resolveConfig();
// Create bridge client (claw-instance level, not per-agent)
const calendarBridge = createCalendarBridgeClient(
api,
live.backendUrl || 'https://monitor.hangman-lab.top',
'unused' // agentId no longer needed at bridge level
);
// Multi-agent sync + check loop
const scheduleCache = new MultiAgentScheduleCache();
const SYNC_INTERVAL_MS = 300_000; // 5 min
const CHECK_INTERVAL_MS = 30_000; // 30 sec
// Sync: pull all agent schedules from backend
async function runSync() {
try {
const result = await calendarBridge.syncSchedules();
if (result) {
scheduleCache.sync(result.date, result.schedules);
const status = scheduleCache.getStatus();
logger.info(`Schedule synced: ${status.agentCount} agents, ${status.totalSlots} slots`);
}
} catch (err) {
logger.warn(`Schedule sync failed: ${String(err)}`);
}
}
// Track wakes already dispatched for a slot in the current sync
// window — the simplified inline scheduler does not PATCH slot
// status server-side, so without dedupe the check loop re-wakes
// the same slot every 30s. Set is cleared by runSync (fresh wake
// budget per sync).
const wakedSlotKeys = new Set<string>();
// Check: find agents with due slots and wake them
async function runCheck() {
const now = new Date();
const agentsWithDue = scheduleCache.getAgentsWithDueSlots(now);
for (const { agentId, slots } of agentsWithDue) {
// Filter out slots we've already woken this sync window
const fresh = slots.filter((s) => {
const key = `${agentId}::${s.id ?? s.virtual_id ?? s.scheduled_at}`;
if (wakedSlotKeys.has(key)) return false;
return true;
});
if (fresh.length === 0) continue;
// Check if agent is busy (best effort; backend may 405 the GET
// — treat unknown as not-busy so wakeup still fires)
let status: string | null = null;
try {
status = await calendarBridge.getAgentStatus(agentId);
} catch {
status = null;
}
if (status === 'busy' || status === 'offline' || status === 'exhausted') {
continue;
}
// Wake the agent with the slot context inlined
const ok = await wakeAgent(agentId, fresh);
if (ok) {
for (const s of fresh) {
const key = `${agentId}::${s.id ?? s.virtual_id ?? s.scheduled_at}`;
wakedSlotKeys.add(key);
}
}
}
}
// Initial sync (also resets the wake-dedupe window)
const runSyncReset = async () => {
wakedSlotKeys.clear();
await runSync();
};
runSyncReset();
// Start intervals
const syncHandle = setInterval(runSyncReset, SYNC_INTERVAL_MS);
const checkHandle = setInterval(runCheck, CHECK_INTERVAL_MS);
// Store handles for cleanup (reuse calendarScheduler variable)
(calendarScheduler as any) = {
stop() {
clearInterval(syncHandle);
clearInterval(checkHandle);
logger.info('Calendar scheduler stopped');
},
};
logger.info('Calendar scheduler started (multi-agent sync mode)');
}
/**
* Stop the calendar scheduler.
*/
function stopCalendarScheduler(): void {
if (calendarScheduler) {
calendarScheduler.stop();
calendarScheduler = null;
logger.info('Calendar scheduler stopped');
}
}
registerGatewayStartHook(api, {
logger,
pushMetaToMonitor,
startCalendarScheduler,
setMetaPushInterval(handle) {
metaPushInterval = handle;
},
});
registerGatewayStopHook(api, {
logger,
getMetaPushInterval() {
return metaPushInterval;
},
clearMetaPushInterval() {
metaPushInterval = null;
},
stopCalendarScheduler,
});
// Tool: plugin status
api.registerTool(() => ({
name: 'harborforge_status',
description: 'Get HarborForge plugin status and current telemetry snapshot',
parameters: {
type: 'object',
properties: {},
},
async execute() {
const live = resolveConfig();
const bridgeClient = getBridgeClient();
let monitorBridge = null;
if (bridgeClient) {
const health = await bridgeClient.health();
monitorBridge = health
? { connected: true, ...health }
: { connected: false, error: 'Monitor bridge unreachable' };
}
// Get calendar scheduler status
const calendarStatus = calendarScheduler ? {
running: calendarScheduler.isRunning(),
processing: calendarScheduler.isProcessing(),
currentSlot: calendarScheduler.getCurrentSlot(),
isRestartPending: calendarScheduler.isRestartPending(),
} : null;
return {
enabled: live.enabled !== false,
config: {
backendUrl: live.backendUrl,
identifier: live.identifier || hostname(),
monitorPort: live.monitor_port ?? null,
reportIntervalSec: live.reportIntervalSec,
hasApiKey: Boolean(live.apiKey),
},
monitorBridge,
calendar: calendarStatus,
telemetry: collectTelemetry(),
};
},
}));
// Tool: telemetry snapshot (for Monitor bridge queries)
api.registerTool(() => ({
name: 'harborforge_telemetry',
description: 'Get current system telemetry data for HarborForge Monitor',
parameters: {
type: 'object',
properties: {},
},
async execute() {
return collectTelemetry();
},
}));
// Tool: query Monitor bridge for host hardware telemetry
api.registerTool(() => ({
name: 'harborforge_monitor_telemetry',
description: 'Query HarborForge Monitor bridge for host hardware telemetry (CPU, memory, disk, etc.)',
parameters: {
type: 'object',
properties: {},
},
async execute() {
const bridgeClient = getBridgeClient();
if (!bridgeClient) {
return {
error: 'Monitor bridge not configured (monitor_port not set or 0)',
};
}
const data = await bridgeClient.telemetry();
if (!data) {
return {
error: 'Monitor bridge unreachable',
};
}
return data;
},
}));
// Tool: calendar slot management
api.registerTool(() => ({
name: 'harborforge_calendar_status',
description: 'Get current calendar scheduler status and pending slots',
parameters: {
type: 'object',
properties: {},
},
async execute() {
if (!calendarScheduler) {
return { error: 'Calendar scheduler not running' };
}
return {
running: calendarScheduler.isRunning(),
processing: calendarScheduler.isProcessing(),
currentSlot: calendarScheduler.getCurrentSlot(),
state: calendarScheduler.getState(),
isRestartPending: calendarScheduler.isRestartPending(),
stateFilePath: calendarScheduler.getStateFilePath(),
};
},
}));
// Tool: complete current slot (for agent to report completion)
api.registerTool(() => ({
name: 'harborforge_calendar_complete',
description: 'Complete the current calendar slot with actual duration',
parameters: {
type: 'object',
properties: {
actualDurationMinutes: {
type: 'number',
description: 'Actual time spent on the task in minutes',
},
},
required: ['actualDurationMinutes'],
},
async execute(params: { actualDurationMinutes: number }) {
if (!calendarScheduler) {
return { error: 'Calendar scheduler not running' };
}
await calendarScheduler.completeCurrentSlot(params.actualDurationMinutes);
return { success: true, message: 'Slot completed' };
},
}));
// Tool: abort current slot (for agent to report failure)
api.registerTool(() => ({
name: 'harborforge_calendar_abort',
description: 'Abort the current calendar slot',
parameters: {
type: 'object',
properties: {
reason: {
type: 'string',
description: 'Reason for aborting',
},
},
},
async execute(params: { reason?: string }) {
if (!calendarScheduler) {
return { error: 'Calendar scheduler not running' };
}
await calendarScheduler.abortCurrentSlot(params.reason);
return { success: true, message: 'Slot aborted' };
},
}));
// Tool: pause current slot
api.registerTool(() => ({
name: 'harborforge_calendar_pause',
description: 'Pause the current calendar slot',
parameters: {
type: 'object',
properties: {},
},
async execute() {
if (!calendarScheduler) {
return { error: 'Calendar scheduler not running' };
}
await calendarScheduler.pauseCurrentSlot();
return { success: true, message: 'Slot paused' };
},
}));
// Tool: resume current slot
api.registerTool(() => ({
name: 'harborforge_calendar_resume',
description: 'Resume the paused calendar slot',
parameters: {
type: 'object',
properties: {},
},
async execute() {
if (!calendarScheduler) {
return { error: 'Calendar scheduler not running' };
}
await calendarScheduler.resumeCurrentSlot();
return { success: true, message: 'Slot resumed' };
},
}));
// Tool: check ScheduledGatewayRestart status
api.registerTool(() => ({
name: 'harborforge_restart_status',
description: 'Check if a gateway restart is pending (PLG-CAL-004)',
parameters: {
type: 'object',
properties: {},
},
async execute() {
if (!calendarScheduler) {
return { error: 'Calendar scheduler not running' };
}
const isPending = calendarScheduler.isRestartPending();
const stateFilePath = calendarScheduler.getStateFilePath();
return {
isRestartPending: isPending,
stateFilePath: stateFilePath,
message: isPending
? 'A gateway restart has been scheduled. The scheduler has been paused.'
: 'No gateway restart is pending.',
};
},
}));
logger.info('HarborForge plugin registered (id: harbor-forge)');
}
// HarborForge's local PluginAPI is broader than the standard OpenClawPluginApi
// (it surfaces optional `version`/`runtime`/`spawn` accessors that older
// OpenClaw builds exposed). The cast at the definePluginEntry boundary
// acknowledges that gap — the runtime api object is whatever the gateway
// passes us, and each access is guarded with optional chaining / fallbacks.
export default definePluginEntry({
id: 'harbor-forge',
name: 'HarborForge',
description: 'HarborForge plugin for OpenClaw - project management, monitoring, and CLI integration',
register: register as (api: any) => void,
});