- Restructure: pcexec/ and safe-restart/ → plugin/{tools,core,commands}
- New pcguard Go binary: validates AGENT_VERIFY, AGENT_ID, AGENT_WORKSPACE
- pcexec now injects AGENT_VERIFY env + appends openclaw bin to PATH
- plugin/index.ts: unified TypeScript entry point with resolveOpenclawPath()
- install.mjs: support --openclaw-profile-path, install pcguard, new paths
- README: updated structure docs + security limitations note
- Removed old root index.js and openclaw.plugin.json
289 lines
7.3 KiB
TypeScript
289 lines
7.3 KiB
TypeScript
import { spawn } from 'child_process';
|
||
import * as fs from 'fs';
|
||
import { promisify } from 'util';
|
||
import { StatusManager } from './status-manager';
|
||
|
||
const sleep = promisify(setTimeout);
|
||
|
||
export interface SafeRestartOptions {
|
||
/** Agent ID performing the restart */
|
||
agentId: string;
|
||
/** Session key for notifications */
|
||
sessionKey: string;
|
||
/** API endpoint for query-restart */
|
||
apiEndpoint?: string;
|
||
/** Rollback script path */
|
||
rollback?: string;
|
||
/** Log file path */
|
||
log?: string;
|
||
/** Polling interval in ms (default: 5000) */
|
||
pollInterval?: number;
|
||
/** Maximum wait time in ms (default: 300000 = 5min) */
|
||
maxWaitTime?: number;
|
||
/** Restart script/command */
|
||
restartScript?: string;
|
||
/** Callback for notifications */
|
||
onNotify?: (sessionKey: string, message: string) => Promise<void>;
|
||
}
|
||
|
||
export interface SafeRestartResult {
|
||
success: boolean;
|
||
message: string;
|
||
log?: string;
|
||
}
|
||
|
||
/**
|
||
* Performs a safe restart with polling and rollback support
|
||
*/
|
||
export async function safeRestart(options: SafeRestartOptions): Promise<SafeRestartResult> {
|
||
const {
|
||
agentId,
|
||
sessionKey,
|
||
apiEndpoint = 'http://localhost:8765',
|
||
rollback,
|
||
log: logPath,
|
||
pollInterval = 5000,
|
||
maxWaitTime = 300000,
|
||
restartScript = 'openclaw gateway restart',
|
||
onNotify,
|
||
} = options;
|
||
|
||
const logs: string[] = [];
|
||
const log = (msg: string) => {
|
||
const entry = `[${new Date().toISOString()}] ${msg}`;
|
||
logs.push(entry);
|
||
console.log(entry);
|
||
};
|
||
|
||
try {
|
||
log(`Starting safe restart. Agent: ${agentId}, Session: ${sessionKey}`);
|
||
|
||
// Step 1: Poll query-restart until OK or timeout
|
||
const startTime = Date.now();
|
||
let restartApproved = false;
|
||
|
||
while (Date.now() - startTime < maxWaitTime) {
|
||
try {
|
||
const response = await fetch(`${apiEndpoint}/query-restart`, {
|
||
method: 'POST',
|
||
headers: { 'Content-Type': 'application/json' },
|
||
body: JSON.stringify({
|
||
requesterAgentId: agentId,
|
||
requesterSessionKey: sessionKey,
|
||
}),
|
||
});
|
||
|
||
const data = await response.json() as { status: string };
|
||
|
||
if (data.status === 'OK') {
|
||
log('All agents ready for restart');
|
||
restartApproved = true;
|
||
break;
|
||
} else if (data.status === 'ALREADY_SCHEDULED') {
|
||
log('Restart already scheduled by another agent');
|
||
return {
|
||
success: false,
|
||
message: 'ALREADY_SCHEDULED',
|
||
};
|
||
} else {
|
||
log(`Waiting for agents to be ready... (${data.status})`);
|
||
}
|
||
} catch (err) {
|
||
log(`Error polling query-restart: ${err}`);
|
||
}
|
||
|
||
await sleep(pollInterval);
|
||
}
|
||
|
||
if (!restartApproved) {
|
||
const msg = 'Timeout waiting for agents to be ready';
|
||
log(msg);
|
||
return {
|
||
success: false,
|
||
message: msg,
|
||
log: logs.join('\n'),
|
||
};
|
||
}
|
||
|
||
// Step 2: Report restart starting
|
||
log('Executing restart...');
|
||
|
||
// Step 3: Start restart in background process
|
||
const restartProcess = startBackgroundRestart(restartScript, logPath);
|
||
|
||
// Wait a moment for restart to initiate
|
||
await sleep(2000);
|
||
|
||
// Step 4: Check if gateway comes back
|
||
log('Waiting for gateway to restart...');
|
||
await sleep(60000); // Wait 60s as specified
|
||
|
||
// Check gateway status
|
||
const gatewayOk = await checkGatewayStatus();
|
||
|
||
if (gatewayOk) {
|
||
log('Gateway restarted successfully');
|
||
|
||
// Report success
|
||
await fetch(`${apiEndpoint}/restart-result`, {
|
||
method: 'POST',
|
||
headers: { 'Content-Type': 'application/json' },
|
||
body: JSON.stringify({
|
||
status: 'ok',
|
||
log: logPath || logs.join('\n'),
|
||
}),
|
||
});
|
||
|
||
// Notify resumption
|
||
if (onNotify) {
|
||
await onNotify(sessionKey, 'restart 结束了,我们继续');
|
||
}
|
||
|
||
return {
|
||
success: true,
|
||
message: 'Restart completed successfully',
|
||
};
|
||
} else {
|
||
log('Gateway restart failed');
|
||
|
||
// Execute rollback if provided
|
||
if (rollback) {
|
||
log(`Executing rollback: ${rollback}`);
|
||
try {
|
||
await executeRollback(rollback);
|
||
log('Rollback completed');
|
||
} catch (err) {
|
||
log(`Rollback failed: ${err}`);
|
||
}
|
||
}
|
||
|
||
// Report failure
|
||
await fetch(`${apiEndpoint}/restart-result`, {
|
||
method: 'POST',
|
||
headers: { 'Content-Type': 'application/json' },
|
||
body: JSON.stringify({
|
||
status: 'failed',
|
||
log: logPath || logs.join('\n'),
|
||
}),
|
||
});
|
||
|
||
// Notify failure
|
||
if (onNotify) {
|
||
await onNotify(sessionKey, 'restart 失败,已经 rollback,请参考 log 调查。');
|
||
}
|
||
|
||
return {
|
||
success: false,
|
||
message: 'Restart failed',
|
||
log: logs.join('\n'),
|
||
};
|
||
}
|
||
} catch (err) {
|
||
const errorMsg = `Unexpected error: ${err}`;
|
||
log(errorMsg);
|
||
return {
|
||
success: false,
|
||
message: errorMsg,
|
||
log: logs.join('\n'),
|
||
};
|
||
}
|
||
}
|
||
|
||
function startBackgroundRestart(restartScript: string, logPath?: string): void {
|
||
const script = `
|
||
#!/bin/bash
|
||
set -e
|
||
sleep 60
|
||
${restartScript}
|
||
openclaw gateway status
|
||
`;
|
||
|
||
const child = spawn('bash', ['-c', script], {
|
||
detached: true,
|
||
stdio: logPath ? ['ignore', fs.openSync(logPath, 'w'), fs.openSync(logPath, 'w+')] : 'ignore',
|
||
});
|
||
|
||
child.unref();
|
||
}
|
||
|
||
async function checkGatewayStatus(): Promise<boolean> {
|
||
return new Promise((resolve) => {
|
||
const child = spawn('openclaw', ['gateway', 'status'], {
|
||
timeout: 10000,
|
||
});
|
||
|
||
let output = '';
|
||
child.stdout?.on('data', (data) => {
|
||
output += data.toString();
|
||
});
|
||
|
||
child.on('close', (code) => {
|
||
resolve(code === 0 && output.includes('running'));
|
||
});
|
||
|
||
child.on('error', () => {
|
||
resolve(false);
|
||
});
|
||
});
|
||
}
|
||
|
||
async function executeRollback(rollbackScript: string): Promise<void> {
|
||
return new Promise((resolve, reject) => {
|
||
const child = spawn('bash', ['-c', rollbackScript], {
|
||
timeout: 120000,
|
||
});
|
||
|
||
child.on('close', (code) => {
|
||
if (code === 0) {
|
||
resolve();
|
||
} else {
|
||
reject(new Error(`Rollback script exited with code ${code}`));
|
||
}
|
||
});
|
||
|
||
child.on('error', (err) => {
|
||
reject(err);
|
||
});
|
||
});
|
||
}
|
||
|
||
/**
|
||
* Safe restart tool that can be registered with OpenClaw
|
||
*/
|
||
export function createSafeRestartTool(statusManager: StatusManager) {
|
||
return {
|
||
name: 'safe_restart',
|
||
description: 'Perform a safe restart of OpenClaw gateway with agent coordination',
|
||
parameters: {
|
||
type: 'object',
|
||
properties: {
|
||
rollback: {
|
||
type: 'string',
|
||
description: 'Path to rollback script',
|
||
},
|
||
log: {
|
||
type: 'string',
|
||
description: 'Path to log file',
|
||
},
|
||
},
|
||
},
|
||
handler: async (params: { rollback?: string; log?: string }, context: { agentId: string; sessionKey: string }) => {
|
||
const result = await safeRestart({
|
||
agentId: context.agentId,
|
||
sessionKey: context.sessionKey,
|
||
rollback: params.rollback,
|
||
log: params.log,
|
||
async onNotify(sessionKey, message) {
|
||
// This would be connected to the messaging system
|
||
console.log(`[${sessionKey}] ${message}`);
|
||
},
|
||
});
|
||
|
||
return {
|
||
success: result.success,
|
||
message: result.message,
|
||
};
|
||
},
|
||
};
|
||
}
|