import { spawn } from 'child_process'; import * as fs from 'fs'; import { promisify } from 'util'; import { StatusManager } from './status-manager'; const sleep = promisify(setTimeout); export interface SafeRestartOptions { /** Agent ID performing the restart */ agentId: string; /** Session key for notifications */ sessionKey: string; /** API endpoint for query-restart */ apiEndpoint?: string; /** Rollback script path */ rollback?: string; /** Log file path */ log?: string; /** Polling interval in ms (default: 5000) */ pollInterval?: number; /** Maximum wait time in ms (default: 300000 = 5min) */ maxWaitTime?: number; /** Restart script/command */ restartScript?: string; /** Callback for notifications */ onNotify?: (sessionKey: string, message: string) => Promise; } export interface SafeRestartResult { success: boolean; message: string; log?: string; } /** * Performs a safe restart with polling and rollback support */ export async function safeRestart(options: SafeRestartOptions): Promise { const { agentId, sessionKey, apiEndpoint = 'http://localhost:8765', rollback, log: logPath, pollInterval = 5000, maxWaitTime = 300000, restartScript = 'openclaw gateway restart', onNotify, } = options; const logs: string[] = []; const log = (msg: string) => { const entry = `[${new Date().toISOString()}] ${msg}`; logs.push(entry); console.log(entry); }; try { log(`Starting safe restart. Agent: ${agentId}, Session: ${sessionKey}`); // Step 1: Poll query-restart until OK or timeout const startTime = Date.now(); let restartApproved = false; while (Date.now() - startTime < maxWaitTime) { try { const response = await fetch(`${apiEndpoint}/query-restart`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ requesterAgentId: agentId, requesterSessionKey: sessionKey, }), }); const data = await response.json() as { status: string }; if (data.status === 'OK') { log('All agents ready for restart'); restartApproved = true; break; } else if (data.status === 'ALREADY_SCHEDULED') { log('Restart already scheduled by another agent'); return { success: false, message: 'ALREADY_SCHEDULED', }; } else { log(`Waiting for agents to be ready... (${data.status})`); } } catch (err) { log(`Error polling query-restart: ${err}`); } await sleep(pollInterval); } if (!restartApproved) { const msg = 'Timeout waiting for agents to be ready'; log(msg); return { success: false, message: msg, log: logs.join('\n'), }; } // Step 2: Report restart starting log('Executing restart...'); // Step 3: Start restart in background process const restartProcess = startBackgroundRestart(restartScript, logPath); // Wait a moment for restart to initiate await sleep(2000); // Step 4: Check if gateway comes back log('Waiting for gateway to restart...'); await sleep(60000); // Wait 60s as specified // Check gateway status const gatewayOk = await checkGatewayStatus(); if (gatewayOk) { log('Gateway restarted successfully'); // Report success await fetch(`${apiEndpoint}/restart-result`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ status: 'ok', log: logPath || logs.join('\n'), }), }); // Notify resumption if (onNotify) { await onNotify(sessionKey, 'restart 结束了,我们继续'); } return { success: true, message: 'Restart completed successfully', }; } else { log('Gateway restart failed'); // Execute rollback if provided if (rollback) { log(`Executing rollback: ${rollback}`); try { await executeRollback(rollback); log('Rollback completed'); } catch (err) { log(`Rollback failed: ${err}`); } } // Report failure await fetch(`${apiEndpoint}/restart-result`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ status: 'failed', log: logPath || logs.join('\n'), }), }); // Notify failure if (onNotify) { await onNotify(sessionKey, 'restart 失败,已经 rollback,请参考 log 调查。'); } return { success: false, message: 'Restart failed', log: logs.join('\n'), }; } } catch (err) { const errorMsg = `Unexpected error: ${err}`; log(errorMsg); return { success: false, message: errorMsg, log: logs.join('\n'), }; } } function startBackgroundRestart(restartScript: string, logPath?: string): void { const script = ` #!/bin/bash set -e sleep 60 ${restartScript} openclaw gateway status `; const child = spawn('bash', ['-c', script], { detached: true, stdio: logPath ? ['ignore', fs.openSync(logPath, 'w'), fs.openSync(logPath, 'w+')] : 'ignore', }); child.unref(); } async function checkGatewayStatus(): Promise { return new Promise((resolve) => { const child = spawn('openclaw', ['gateway', 'status'], { timeout: 10000, }); let output = ''; child.stdout?.on('data', (data) => { output += data.toString(); }); child.on('close', (code) => { resolve(code === 0 && output.includes('running')); }); child.on('error', () => { resolve(false); }); }); } async function executeRollback(rollbackScript: string): Promise { return new Promise((resolve, reject) => { const child = spawn('bash', ['-c', rollbackScript], { timeout: 120000, }); child.on('close', (code) => { if (code === 0) { resolve(); } else { reject(new Error(`Rollback script exited with code ${code}`)); } }); child.on('error', (err) => { reject(err); }); }); } /** * Safe restart tool that can be registered with OpenClaw */ export function createSafeRestartTool(statusManager: StatusManager) { return { name: 'safe_restart', description: 'Perform a safe restart of OpenClaw gateway with agent coordination', parameters: { type: 'object', properties: { rollback: { type: 'string', description: 'Path to rollback script', }, log: { type: 'string', description: 'Path to log file', }, }, }, handler: async (params: { rollback?: string; log?: string }, context: { agentId: string; sessionKey: string }) => { const result = await safeRestart({ agentId: context.agentId, sessionKey: context.sessionKey, rollback: params.rollback, log: params.log, async onNotify(sessionKey, message) { // This would be connected to the messaging system console.log(`[${sessionKey}] ${message}`); }, }); return { success: result.success, message: result.message, }; }, }; }