PLG-CAL-004: Implement ScheduledGatewayRestart handling in plugin

- Add state persistence (persistState/restoreState) for recovery after restart
- Add handleScheduledGatewayRestart method that:
  - Persists current scheduler state to disk
  - Sends final heartbeat to backend before shutdown
  - Stops the calendar scheduler (pauses scheduled tasks)
- Add isRestartPending flag to prevent new slot processing during restart
- Add isScheduledGatewayRestart helper to detect restart events
- Update scheduler to detect and handle ScheduledGatewayRestart events
- Add new tools: harborforge_restart_status, harborforge_calendar_pause/resume
- Export isRestartPending and getStateFilePath methods
- Bump plugin version to 0.3.1
This commit is contained in:
zhi
2026-04-01 09:41:02 +00:00
parent 24c4a7ad14
commit 3b0ea0ad12
6 changed files with 578 additions and 19 deletions

View File

@@ -2,6 +2,7 @@
* HarborForge Calendar Scheduler
*
* PLG-CAL-002: Plugin-side handling for pending slot execution.
* PLG-CAL-004: ScheduledGatewayRestart event handling with state persistence.
*
* Responsibilities:
* - Run calendar heartbeat every minute
@@ -9,13 +10,15 @@
* - Wake agent with task context
* - Handle slot status transitions (attended, ongoing, deferred)
* - Manage agent status transitions (idle → busy/on_call)
* - Persist state on ScheduledGatewayRestart and restore on startup
* - Send final heartbeat before graceful shutdown
*
* Design reference: NEXT_WAVE_DEV_DIRECTION.md §6 (Agent wakeup mechanism)
*/
import {
CalendarBridgeClient,
} from './calendar-bridge';
import { writeFileSync, readFileSync, existsSync, mkdirSync } from 'fs';
import { join, dirname } from 'path';
import { CalendarBridgeClient } from './calendar-bridge';
import {
CalendarSlotResponse,
SlotStatus,
@@ -43,6 +46,8 @@ export interface CalendarSchedulerConfig {
heartbeatIntervalMs?: number;
/** Enable verbose debug logging */
debug?: boolean;
/** Directory for state persistence (default: plugin data dir) */
stateDir?: string;
}
/**
@@ -60,6 +65,26 @@ export interface AgentWakeContext {
isVirtual: boolean;
}
/**
* Persisted state structure for recovery after restart.
*/
interface PersistedState {
/** Version for migration compatibility */
version: number;
/** When the state was persisted */
persistedAt: string;
/** Reason for persistence (e.g., 'ScheduledGatewayRestart') */
reason: string;
/** The slot that was being executed when persisted */
currentSlot: CalendarSlotResponse | null;
/** Deferred slot IDs at persistence time */
deferredSlotIds: string[];
/** Whether a slot was in progress */
isProcessing: boolean;
/** Agent status at persistence time */
agentStatus: AgentStatusValue | null;
}
/**
* Current execution state tracked by the scheduler.
*/
@@ -76,21 +101,33 @@ interface SchedulerState {
deferredSlotIds: Set<string>;
/** Whether agent is currently processing a slot */
isProcessing: boolean;
/** Whether a gateway restart is scheduled/pending */
isRestartPending: boolean;
}
/** State file name */
const STATE_FILENAME = 'calendar-scheduler-state.json';
/** State file version for migration compatibility */
const STATE_VERSION = 1;
/**
* CalendarScheduler manages the periodic heartbeat and slot execution lifecycle.
*/
export class CalendarScheduler {
private config: Required<CalendarSchedulerConfig>;
private state: SchedulerState;
private stateFilePath: string;
constructor(config: CalendarSchedulerConfig) {
this.config = {
heartbeatIntervalMs: 60000, // 1 minute default
debug: false,
stateDir: this.getDefaultStateDir(),
...config,
};
this.stateFilePath = join(this.config.stateDir, STATE_FILENAME);
this.state = {
isRunning: false,
currentSlot: null,
@@ -98,7 +135,182 @@ export class CalendarScheduler {
intervalHandle: null,
deferredSlotIds: new Set(),
isProcessing: false,
isRestartPending: false,
};
// Attempt to restore state from previous persistence
this.restoreState();
}
/**
* Get default state directory (plugin data directory or temp fallback).
*/
private getDefaultStateDir(): string {
// Try to use the plugin's directory or a standard data location
const candidates = [
process.env.OPENCLAW_PLUGIN_DATA_DIR,
process.env.HARBORFORGE_PLUGIN_DIR,
join(process.cwd(), '.harborforge'),
join(process.cwd(), 'data'),
'/tmp/harborforge',
];
for (const dir of candidates) {
if (dir) {
try {
if (!existsSync(dir)) {
mkdirSync(dir, { recursive: true });
}
// Test write access
const testFile = join(dir, '.write-test');
writeFileSync(testFile, '', { flag: 'w' });
return dir;
} catch {
continue;
}
}
}
// Fallback to current working directory
return process.cwd();
}
/**
* Persist current state to disk for recovery after restart.
*/
private persistState(reason: string): void {
try {
const persistedState: PersistedState = {
version: STATE_VERSION,
persistedAt: new Date().toISOString(),
reason,
currentSlot: this.state.currentSlot,
deferredSlotIds: Array.from(this.state.deferredSlotIds),
isProcessing: this.state.isProcessing,
agentStatus: null, // Will be determined at restore time
};
writeFileSync(this.stateFilePath, JSON.stringify(persistedState, null, 2));
this.config.logger.info(`[PLG-CAL-004] State persisted to ${this.stateFilePath} (reason: ${reason})`);
} catch (err) {
this.config.logger.error('[PLG-CAL-004] Failed to persist state:', err);
}
}
/**
* Restore state from disk if available.
*/
private restoreState(): void {
try {
if (!existsSync(this.stateFilePath)) {
return;
}
const data = readFileSync(this.stateFilePath, 'utf-8');
const persisted: PersistedState = JSON.parse(data);
// Validate version
if (persisted.version !== STATE_VERSION) {
this.config.logger.warn(`[PLG-CAL-004] State version mismatch: ${persisted.version} vs ${STATE_VERSION}`);
this.clearPersistedState();
return;
}
// Restore deferred slot IDs
if (persisted.deferredSlotIds && persisted.deferredSlotIds.length > 0) {
this.state.deferredSlotIds = new Set(persisted.deferredSlotIds);
this.config.logger.info(`[PLG-CAL-004] Restored ${persisted.deferredSlotIds.length} deferred slot(s)`);
}
// If there was a slot in progress, mark it for replanning
if (persisted.isProcessing && persisted.currentSlot) {
this.config.logger.warn(
`[PLG-CAL-004] Previous session had in-progress slot: ${this.getSlotId(persisted.currentSlot)}`
);
// The slot will be picked up by the next heartbeat and can be resumed or deferred
}
this.config.logger.info(`[PLG-CAL-004] State restored from ${persisted.persistedAt} (reason: ${persisted.reason})`);
// Clear the persisted state after successful restore
this.clearPersistedState();
} catch (err) {
this.config.logger.error('[PLG-CAL-004] Failed to restore state:', err);
}
}
/**
* Clear persisted state file after successful restore.
*/
private clearPersistedState(): void {
try {
if (existsSync(this.stateFilePath)) {
// In a real implementation, we might want to archive instead of delete
// For now, we'll just clear the content to mark as processed
writeFileSync(this.stateFilePath, JSON.stringify({ restored: true, at: new Date().toISOString() }));
}
} catch (err) {
this.config.logger.error('[PLG-CAL-004] Failed to clear persisted state:', err);
}
}
/**
* Send a final heartbeat to the backend before shutdown.
*/
private async sendFinalHeartbeat(reason: string): Promise<void> {
try {
this.config.logger.info(`[PLG-CAL-004] Sending final heartbeat (reason: ${reason})`);
// Send agent status update indicating we're going offline
await this.config.bridge.reportAgentStatus({ status: 'offline' });
this.config.logger.info('[PLG-CAL-004] Final heartbeat sent successfully');
} catch (err) {
this.config.logger.error('[PLG-CAL-004] Failed to send final heartbeat:', err);
}
}
/**
* Handle ScheduledGatewayRestart event.
* PLG-CAL-004: Persist state, send final heartbeat, pause scheduled tasks.
*/
private async handleScheduledGatewayRestart(slot: CalendarSlotResponse): Promise<void> {
this.config.logger.info('[PLG-CAL-004] Handling ScheduledGatewayRestart event');
// 1. Mark restart as pending to prevent new slot processing
this.state.isRestartPending = true;
// 2. Persist current state
this.persistState('ScheduledGatewayRestart');
// 3. If there's a current slot, pause it gracefully
if (this.state.isProcessing && this.state.currentSlot) {
this.config.logger.info('[PLG-CAL-004] Pausing current slot before restart');
await this.pauseCurrentSlot();
}
// 4. Send final heartbeat
await this.sendFinalHeartbeat('ScheduledGatewayRestart');
// 5. Stop the scheduler (pause scheduled tasks)
this.config.logger.info('[PLG-CAL-004] Stopping scheduler due to gateway restart');
this.stop();
// 6. Mark the slot as finished (since we've handled the restart)
const update: SlotAgentUpdate = {
status: SlotStatus.FINISHED,
actual_duration: 0, // Restart preparation doesn't take time
};
try {
if (slot.id) {
await this.config.bridge.updateSlot(slot.id, update);
} else if (slot.virtual_id) {
await this.config.bridge.updateVirtualSlot(slot.virtual_id, update);
}
} catch (err) {
this.config.logger.error('[PLG-CAL-004] Failed to mark restart slot as finished:', err);
}
}
/**
@@ -112,6 +324,7 @@ export class CalendarScheduler {
}
this.state.isRunning = true;
this.state.isRestartPending = false;
this.config.logger.info('Calendar scheduler started');
// Run initial heartbeat immediately
@@ -148,6 +361,12 @@ export class CalendarScheduler {
return;
}
// Skip heartbeat if restart is pending
if (this.state.isRestartPending) {
this.logDebug('Heartbeat skipped: gateway restart pending');
return;
}
this.state.lastHeartbeatAt = new Date();
try {
@@ -159,7 +378,9 @@ export class CalendarScheduler {
return;
}
this.logDebug(`Heartbeat: ${response.slots.length} slots pending, agent_status=${response.agent_status}`);
this.logDebug(
`Heartbeat: ${response.slots.length} slots pending, agent_status=${response.agent_status}`
);
// If agent is not idle, defer all pending slots
if (response.agent_status !== 'idle') {
@@ -178,7 +399,6 @@ export class CalendarScheduler {
// Agent is idle - handle pending slots
await this.handleIdleAgent(response.slots);
} catch (err) {
this.config.logger.error('Heartbeat error:', err);
}
@@ -225,7 +445,7 @@ export class CalendarScheduler {
// Filter out already deferred slots in this session
const eligibleSlots = slots.filter(
s => !this.state.deferredSlotIds.has(this.getSlotId(s))
(s) => !this.state.deferredSlotIds.has(this.getSlotId(s))
);
if (eligibleSlots.length === 0) {
@@ -238,7 +458,7 @@ export class CalendarScheduler {
this.config.logger.info(
`Selected slot for execution: id=${this.getSlotId(selectedSlot)}, ` +
`type=${selectedSlot.slot_type}, priority=${selectedSlot.priority}`
`type=${selectedSlot.slot_type}, priority=${selectedSlot.priority}`
);
// Mark remaining slots as deferred
@@ -247,10 +467,27 @@ export class CalendarScheduler {
this.state.deferredSlotIds.add(this.getSlotId(slot));
}
// Check if this is a ScheduledGatewayRestart event
if (this.isScheduledGatewayRestart(selectedSlot)) {
await this.handleScheduledGatewayRestart(selectedSlot);
return;
}
// Wake agent to execute selected slot
await this.executeSlot(selectedSlot);
}
/**
* Check if a slot is a ScheduledGatewayRestart system event.
*/
private isScheduledGatewayRestart(slot: CalendarSlotResponse): boolean {
if (slot.event_type !== 'system_event' || !slot.event_data) {
return false;
}
const sysData = slot.event_data as CalendarEventDataSystemEvent;
return sysData.event === 'ScheduledGatewayRestart';
}
/**
* Execute a slot by waking the agent.
*/
@@ -315,7 +552,6 @@ export class CalendarScheduler {
// Note: isProcessing remains true until agent signals completion
// This is handled by external completion callback
} catch (err) {
this.config.logger.error('Error executing slot:', err);
this.state.isProcessing = false;
@@ -361,7 +597,10 @@ export class CalendarScheduler {
/**
* Build prompt for job-type slots.
*/
private buildJobPrompt(slot: CalendarSlotResponse, jobData: CalendarEventDataJob): string {
private buildJobPrompt(
slot: CalendarSlotResponse,
jobData: CalendarEventDataJob
): string {
const duration = slot.estimated_duration;
const type = jobData.type;
const code = jobData.code;
@@ -518,7 +757,6 @@ Please use this time for the scheduled activity.`;
this.config.logger.info(
`Completed slot ${this.getSlotId(slot)}, actual_duration=${actualDurationMinutes}min`
);
} catch (err) {
this.config.logger.error('Failed to complete slot:', err);
} finally {
@@ -556,7 +794,6 @@ Please use this time for the scheduled activity.`;
this.config.logger.info(
`Aborted slot ${this.getSlotId(slot)}${reason ? `: ${reason}` : ''}`
);
} catch (err) {
this.config.logger.error('Failed to abort slot:', err);
} finally {
@@ -589,7 +826,6 @@ Please use this time for the scheduled activity.`;
}
this.config.logger.info(`Paused slot ${this.getSlotId(slot)}`);
} catch (err) {
this.config.logger.error('Failed to pause slot:', err);
}
@@ -617,7 +853,6 @@ Please use this time for the scheduled activity.`;
}
this.config.logger.info(`Resumed slot ${this.getSlotId(slot)}`);
} catch (err) {
this.config.logger.error('Failed to resume slot:', err);
}
@@ -692,6 +927,20 @@ Please use this time for the scheduled activity.`;
getCurrentSlot(): CalendarSlotResponse | null {
return this.state.currentSlot;
}
/**
* Check if a gateway restart is pending.
*/
isRestartPending(): boolean {
return this.state.isRestartPending;
}
/**
* Get the path to the state file.
*/
getStateFilePath(): string {
return this.stateFilePath;
}
}
/**