PLG-CAL-004: Implement ScheduledGatewayRestart handling in plugin
- Add state persistence (persistState/restoreState) for recovery after restart - Add handleScheduledGatewayRestart method that: - Persists current scheduler state to disk - Sends final heartbeat to backend before shutdown - Stops the calendar scheduler (pauses scheduled tasks) - Add isRestartPending flag to prevent new slot processing during restart - Add isScheduledGatewayRestart helper to detect restart events - Update scheduler to detect and handle ScheduledGatewayRestart events - Add new tools: harborforge_restart_status, harborforge_calendar_pause/resume - Export isRestartPending and getStateFilePath methods - Bump plugin version to 0.3.1
This commit is contained in:
@@ -2,6 +2,7 @@
|
||||
* HarborForge Calendar Scheduler
|
||||
*
|
||||
* PLG-CAL-002: Plugin-side handling for pending slot execution.
|
||||
* PLG-CAL-004: ScheduledGatewayRestart event handling with state persistence.
|
||||
*
|
||||
* Responsibilities:
|
||||
* - Run calendar heartbeat every minute
|
||||
@@ -9,13 +10,15 @@
|
||||
* - Wake agent with task context
|
||||
* - Handle slot status transitions (attended, ongoing, deferred)
|
||||
* - Manage agent status transitions (idle → busy/on_call)
|
||||
* - Persist state on ScheduledGatewayRestart and restore on startup
|
||||
* - Send final heartbeat before graceful shutdown
|
||||
*
|
||||
* Design reference: NEXT_WAVE_DEV_DIRECTION.md §6 (Agent wakeup mechanism)
|
||||
*/
|
||||
|
||||
import {
|
||||
CalendarBridgeClient,
|
||||
} from './calendar-bridge';
|
||||
import { writeFileSync, readFileSync, existsSync, mkdirSync } from 'fs';
|
||||
import { join, dirname } from 'path';
|
||||
import { CalendarBridgeClient } from './calendar-bridge';
|
||||
import {
|
||||
CalendarSlotResponse,
|
||||
SlotStatus,
|
||||
@@ -43,6 +46,8 @@ export interface CalendarSchedulerConfig {
|
||||
heartbeatIntervalMs?: number;
|
||||
/** Enable verbose debug logging */
|
||||
debug?: boolean;
|
||||
/** Directory for state persistence (default: plugin data dir) */
|
||||
stateDir?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -60,6 +65,26 @@ export interface AgentWakeContext {
|
||||
isVirtual: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Persisted state structure for recovery after restart.
|
||||
*/
|
||||
interface PersistedState {
|
||||
/** Version for migration compatibility */
|
||||
version: number;
|
||||
/** When the state was persisted */
|
||||
persistedAt: string;
|
||||
/** Reason for persistence (e.g., 'ScheduledGatewayRestart') */
|
||||
reason: string;
|
||||
/** The slot that was being executed when persisted */
|
||||
currentSlot: CalendarSlotResponse | null;
|
||||
/** Deferred slot IDs at persistence time */
|
||||
deferredSlotIds: string[];
|
||||
/** Whether a slot was in progress */
|
||||
isProcessing: boolean;
|
||||
/** Agent status at persistence time */
|
||||
agentStatus: AgentStatusValue | null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Current execution state tracked by the scheduler.
|
||||
*/
|
||||
@@ -76,21 +101,33 @@ interface SchedulerState {
|
||||
deferredSlotIds: Set<string>;
|
||||
/** Whether agent is currently processing a slot */
|
||||
isProcessing: boolean;
|
||||
/** Whether a gateway restart is scheduled/pending */
|
||||
isRestartPending: boolean;
|
||||
}
|
||||
|
||||
/** State file name */
|
||||
const STATE_FILENAME = 'calendar-scheduler-state.json';
|
||||
/** State file version for migration compatibility */
|
||||
const STATE_VERSION = 1;
|
||||
|
||||
/**
|
||||
* CalendarScheduler manages the periodic heartbeat and slot execution lifecycle.
|
||||
*/
|
||||
export class CalendarScheduler {
|
||||
private config: Required<CalendarSchedulerConfig>;
|
||||
private state: SchedulerState;
|
||||
private stateFilePath: string;
|
||||
|
||||
constructor(config: CalendarSchedulerConfig) {
|
||||
this.config = {
|
||||
heartbeatIntervalMs: 60000, // 1 minute default
|
||||
debug: false,
|
||||
stateDir: this.getDefaultStateDir(),
|
||||
...config,
|
||||
};
|
||||
|
||||
this.stateFilePath = join(this.config.stateDir, STATE_FILENAME);
|
||||
|
||||
this.state = {
|
||||
isRunning: false,
|
||||
currentSlot: null,
|
||||
@@ -98,7 +135,182 @@ export class CalendarScheduler {
|
||||
intervalHandle: null,
|
||||
deferredSlotIds: new Set(),
|
||||
isProcessing: false,
|
||||
isRestartPending: false,
|
||||
};
|
||||
|
||||
// Attempt to restore state from previous persistence
|
||||
this.restoreState();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get default state directory (plugin data directory or temp fallback).
|
||||
*/
|
||||
private getDefaultStateDir(): string {
|
||||
// Try to use the plugin's directory or a standard data location
|
||||
const candidates = [
|
||||
process.env.OPENCLAW_PLUGIN_DATA_DIR,
|
||||
process.env.HARBORFORGE_PLUGIN_DIR,
|
||||
join(process.cwd(), '.harborforge'),
|
||||
join(process.cwd(), 'data'),
|
||||
'/tmp/harborforge',
|
||||
];
|
||||
|
||||
for (const dir of candidates) {
|
||||
if (dir) {
|
||||
try {
|
||||
if (!existsSync(dir)) {
|
||||
mkdirSync(dir, { recursive: true });
|
||||
}
|
||||
// Test write access
|
||||
const testFile = join(dir, '.write-test');
|
||||
writeFileSync(testFile, '', { flag: 'w' });
|
||||
return dir;
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback to current working directory
|
||||
return process.cwd();
|
||||
}
|
||||
|
||||
/**
|
||||
* Persist current state to disk for recovery after restart.
|
||||
*/
|
||||
private persistState(reason: string): void {
|
||||
try {
|
||||
const persistedState: PersistedState = {
|
||||
version: STATE_VERSION,
|
||||
persistedAt: new Date().toISOString(),
|
||||
reason,
|
||||
currentSlot: this.state.currentSlot,
|
||||
deferredSlotIds: Array.from(this.state.deferredSlotIds),
|
||||
isProcessing: this.state.isProcessing,
|
||||
agentStatus: null, // Will be determined at restore time
|
||||
};
|
||||
|
||||
writeFileSync(this.stateFilePath, JSON.stringify(persistedState, null, 2));
|
||||
this.config.logger.info(`[PLG-CAL-004] State persisted to ${this.stateFilePath} (reason: ${reason})`);
|
||||
} catch (err) {
|
||||
this.config.logger.error('[PLG-CAL-004] Failed to persist state:', err);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Restore state from disk if available.
|
||||
*/
|
||||
private restoreState(): void {
|
||||
try {
|
||||
if (!existsSync(this.stateFilePath)) {
|
||||
return;
|
||||
}
|
||||
|
||||
const data = readFileSync(this.stateFilePath, 'utf-8');
|
||||
const persisted: PersistedState = JSON.parse(data);
|
||||
|
||||
// Validate version
|
||||
if (persisted.version !== STATE_VERSION) {
|
||||
this.config.logger.warn(`[PLG-CAL-004] State version mismatch: ${persisted.version} vs ${STATE_VERSION}`);
|
||||
this.clearPersistedState();
|
||||
return;
|
||||
}
|
||||
|
||||
// Restore deferred slot IDs
|
||||
if (persisted.deferredSlotIds && persisted.deferredSlotIds.length > 0) {
|
||||
this.state.deferredSlotIds = new Set(persisted.deferredSlotIds);
|
||||
this.config.logger.info(`[PLG-CAL-004] Restored ${persisted.deferredSlotIds.length} deferred slot(s)`);
|
||||
}
|
||||
|
||||
// If there was a slot in progress, mark it for replanning
|
||||
if (persisted.isProcessing && persisted.currentSlot) {
|
||||
this.config.logger.warn(
|
||||
`[PLG-CAL-004] Previous session had in-progress slot: ${this.getSlotId(persisted.currentSlot)}`
|
||||
);
|
||||
// The slot will be picked up by the next heartbeat and can be resumed or deferred
|
||||
}
|
||||
|
||||
this.config.logger.info(`[PLG-CAL-004] State restored from ${persisted.persistedAt} (reason: ${persisted.reason})`);
|
||||
|
||||
// Clear the persisted state after successful restore
|
||||
this.clearPersistedState();
|
||||
} catch (err) {
|
||||
this.config.logger.error('[PLG-CAL-004] Failed to restore state:', err);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear persisted state file after successful restore.
|
||||
*/
|
||||
private clearPersistedState(): void {
|
||||
try {
|
||||
if (existsSync(this.stateFilePath)) {
|
||||
// In a real implementation, we might want to archive instead of delete
|
||||
// For now, we'll just clear the content to mark as processed
|
||||
writeFileSync(this.stateFilePath, JSON.stringify({ restored: true, at: new Date().toISOString() }));
|
||||
}
|
||||
} catch (err) {
|
||||
this.config.logger.error('[PLG-CAL-004] Failed to clear persisted state:', err);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Send a final heartbeat to the backend before shutdown.
|
||||
*/
|
||||
private async sendFinalHeartbeat(reason: string): Promise<void> {
|
||||
try {
|
||||
this.config.logger.info(`[PLG-CAL-004] Sending final heartbeat (reason: ${reason})`);
|
||||
|
||||
// Send agent status update indicating we're going offline
|
||||
await this.config.bridge.reportAgentStatus({ status: 'offline' });
|
||||
|
||||
this.config.logger.info('[PLG-CAL-004] Final heartbeat sent successfully');
|
||||
} catch (err) {
|
||||
this.config.logger.error('[PLG-CAL-004] Failed to send final heartbeat:', err);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle ScheduledGatewayRestart event.
|
||||
* PLG-CAL-004: Persist state, send final heartbeat, pause scheduled tasks.
|
||||
*/
|
||||
private async handleScheduledGatewayRestart(slot: CalendarSlotResponse): Promise<void> {
|
||||
this.config.logger.info('[PLG-CAL-004] Handling ScheduledGatewayRestart event');
|
||||
|
||||
// 1. Mark restart as pending to prevent new slot processing
|
||||
this.state.isRestartPending = true;
|
||||
|
||||
// 2. Persist current state
|
||||
this.persistState('ScheduledGatewayRestart');
|
||||
|
||||
// 3. If there's a current slot, pause it gracefully
|
||||
if (this.state.isProcessing && this.state.currentSlot) {
|
||||
this.config.logger.info('[PLG-CAL-004] Pausing current slot before restart');
|
||||
await this.pauseCurrentSlot();
|
||||
}
|
||||
|
||||
// 4. Send final heartbeat
|
||||
await this.sendFinalHeartbeat('ScheduledGatewayRestart');
|
||||
|
||||
// 5. Stop the scheduler (pause scheduled tasks)
|
||||
this.config.logger.info('[PLG-CAL-004] Stopping scheduler due to gateway restart');
|
||||
this.stop();
|
||||
|
||||
// 6. Mark the slot as finished (since we've handled the restart)
|
||||
const update: SlotAgentUpdate = {
|
||||
status: SlotStatus.FINISHED,
|
||||
actual_duration: 0, // Restart preparation doesn't take time
|
||||
};
|
||||
|
||||
try {
|
||||
if (slot.id) {
|
||||
await this.config.bridge.updateSlot(slot.id, update);
|
||||
} else if (slot.virtual_id) {
|
||||
await this.config.bridge.updateVirtualSlot(slot.virtual_id, update);
|
||||
}
|
||||
} catch (err) {
|
||||
this.config.logger.error('[PLG-CAL-004] Failed to mark restart slot as finished:', err);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -112,6 +324,7 @@ export class CalendarScheduler {
|
||||
}
|
||||
|
||||
this.state.isRunning = true;
|
||||
this.state.isRestartPending = false;
|
||||
this.config.logger.info('Calendar scheduler started');
|
||||
|
||||
// Run initial heartbeat immediately
|
||||
@@ -148,6 +361,12 @@ export class CalendarScheduler {
|
||||
return;
|
||||
}
|
||||
|
||||
// Skip heartbeat if restart is pending
|
||||
if (this.state.isRestartPending) {
|
||||
this.logDebug('Heartbeat skipped: gateway restart pending');
|
||||
return;
|
||||
}
|
||||
|
||||
this.state.lastHeartbeatAt = new Date();
|
||||
|
||||
try {
|
||||
@@ -159,7 +378,9 @@ export class CalendarScheduler {
|
||||
return;
|
||||
}
|
||||
|
||||
this.logDebug(`Heartbeat: ${response.slots.length} slots pending, agent_status=${response.agent_status}`);
|
||||
this.logDebug(
|
||||
`Heartbeat: ${response.slots.length} slots pending, agent_status=${response.agent_status}`
|
||||
);
|
||||
|
||||
// If agent is not idle, defer all pending slots
|
||||
if (response.agent_status !== 'idle') {
|
||||
@@ -178,7 +399,6 @@ export class CalendarScheduler {
|
||||
|
||||
// Agent is idle - handle pending slots
|
||||
await this.handleIdleAgent(response.slots);
|
||||
|
||||
} catch (err) {
|
||||
this.config.logger.error('Heartbeat error:', err);
|
||||
}
|
||||
@@ -225,7 +445,7 @@ export class CalendarScheduler {
|
||||
|
||||
// Filter out already deferred slots in this session
|
||||
const eligibleSlots = slots.filter(
|
||||
s => !this.state.deferredSlotIds.has(this.getSlotId(s))
|
||||
(s) => !this.state.deferredSlotIds.has(this.getSlotId(s))
|
||||
);
|
||||
|
||||
if (eligibleSlots.length === 0) {
|
||||
@@ -238,7 +458,7 @@ export class CalendarScheduler {
|
||||
|
||||
this.config.logger.info(
|
||||
`Selected slot for execution: id=${this.getSlotId(selectedSlot)}, ` +
|
||||
`type=${selectedSlot.slot_type}, priority=${selectedSlot.priority}`
|
||||
`type=${selectedSlot.slot_type}, priority=${selectedSlot.priority}`
|
||||
);
|
||||
|
||||
// Mark remaining slots as deferred
|
||||
@@ -247,10 +467,27 @@ export class CalendarScheduler {
|
||||
this.state.deferredSlotIds.add(this.getSlotId(slot));
|
||||
}
|
||||
|
||||
// Check if this is a ScheduledGatewayRestart event
|
||||
if (this.isScheduledGatewayRestart(selectedSlot)) {
|
||||
await this.handleScheduledGatewayRestart(selectedSlot);
|
||||
return;
|
||||
}
|
||||
|
||||
// Wake agent to execute selected slot
|
||||
await this.executeSlot(selectedSlot);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a slot is a ScheduledGatewayRestart system event.
|
||||
*/
|
||||
private isScheduledGatewayRestart(slot: CalendarSlotResponse): boolean {
|
||||
if (slot.event_type !== 'system_event' || !slot.event_data) {
|
||||
return false;
|
||||
}
|
||||
const sysData = slot.event_data as CalendarEventDataSystemEvent;
|
||||
return sysData.event === 'ScheduledGatewayRestart';
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute a slot by waking the agent.
|
||||
*/
|
||||
@@ -315,7 +552,6 @@ export class CalendarScheduler {
|
||||
|
||||
// Note: isProcessing remains true until agent signals completion
|
||||
// This is handled by external completion callback
|
||||
|
||||
} catch (err) {
|
||||
this.config.logger.error('Error executing slot:', err);
|
||||
this.state.isProcessing = false;
|
||||
@@ -361,7 +597,10 @@ export class CalendarScheduler {
|
||||
/**
|
||||
* Build prompt for job-type slots.
|
||||
*/
|
||||
private buildJobPrompt(slot: CalendarSlotResponse, jobData: CalendarEventDataJob): string {
|
||||
private buildJobPrompt(
|
||||
slot: CalendarSlotResponse,
|
||||
jobData: CalendarEventDataJob
|
||||
): string {
|
||||
const duration = slot.estimated_duration;
|
||||
const type = jobData.type;
|
||||
const code = jobData.code;
|
||||
@@ -518,7 +757,6 @@ Please use this time for the scheduled activity.`;
|
||||
this.config.logger.info(
|
||||
`Completed slot ${this.getSlotId(slot)}, actual_duration=${actualDurationMinutes}min`
|
||||
);
|
||||
|
||||
} catch (err) {
|
||||
this.config.logger.error('Failed to complete slot:', err);
|
||||
} finally {
|
||||
@@ -556,7 +794,6 @@ Please use this time for the scheduled activity.`;
|
||||
this.config.logger.info(
|
||||
`Aborted slot ${this.getSlotId(slot)}${reason ? `: ${reason}` : ''}`
|
||||
);
|
||||
|
||||
} catch (err) {
|
||||
this.config.logger.error('Failed to abort slot:', err);
|
||||
} finally {
|
||||
@@ -589,7 +826,6 @@ Please use this time for the scheduled activity.`;
|
||||
}
|
||||
|
||||
this.config.logger.info(`Paused slot ${this.getSlotId(slot)}`);
|
||||
|
||||
} catch (err) {
|
||||
this.config.logger.error('Failed to pause slot:', err);
|
||||
}
|
||||
@@ -617,7 +853,6 @@ Please use this time for the scheduled activity.`;
|
||||
}
|
||||
|
||||
this.config.logger.info(`Resumed slot ${this.getSlotId(slot)}`);
|
||||
|
||||
} catch (err) {
|
||||
this.config.logger.error('Failed to resume slot:', err);
|
||||
}
|
||||
@@ -692,6 +927,20 @@ Please use this time for the scheduled activity.`;
|
||||
getCurrentSlot(): CalendarSlotResponse | null {
|
||||
return this.state.currentSlot;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a gateway restart is pending.
|
||||
*/
|
||||
isRestartPending(): boolean {
|
||||
return this.state.isRestartPending;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the path to the state file.
|
||||
*/
|
||||
getStateFilePath(): string {
|
||||
return this.stateFilePath;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
Reference in New Issue
Block a user