From a148c11e5033cb2ec58786e42f120d5a888a07ea Mon Sep 17 00:00:00 2001 From: zhi Date: Thu, 19 Mar 2026 13:37:11 +0000 Subject: [PATCH] feat: implement HarborForge Monitor OpenClaw Plugin Architecture: - openclaw.plugin.json: Plugin manifest with config schema - index.mjs: Plugin entry, lifecycle hooks (gateway:start/stop) - sidecar/server.mjs: Independent Node process for telemetry Features: - Collects system metrics (CPU, memory, disk, load, uptime) - Collects OpenClaw status (version, agents) - HTTP heartbeat to HarborForge Monitor - Config via ~/.openclaw/openclaw.json - Sidecar auto-starts/stops with Gateway Config options: - enabled, backendUrl, identifier - challengeUuid (required, from Monitor registration) - reportIntervalSec, httpFallbackIntervalSec - logLevel Provides tool: harborforge_monitor_status --- README.md | 154 ++++++++++++++++++++++ index.mjs | 178 +++++++++++++++++++++++++ openclaw.plugin.json | 48 +++++++ package.json | 16 +++ sidecar/server.mjs | 302 +++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 698 insertions(+) create mode 100644 README.md create mode 100644 index.mjs create mode 100644 openclaw.plugin.json create mode 100644 package.json create mode 100644 sidecar/server.mjs diff --git a/README.md b/README.md new file mode 100644 index 0000000..159c1e0 --- /dev/null +++ b/README.md @@ -0,0 +1,154 @@ +# HarborForge OpenClaw Plugin + +OpenClaw 插件,将服务器遥测数据流式传输到 HarborForge Monitor。 + +## 架构 + +``` +┌─────────────────────────────────────────────────┐ +│ OpenClaw Gateway │ +│ ┌───────────────────────────────────────────┐ │ +│ │ HarborForge.OpenclawPlugin (index.mjs) │ │ +│ │ - 生命周期管理 (启动/停止) │ │ +│ │ - 配置管理 │ │ +│ └───────────────────────────────────────────┘ │ +│ │ │ +│ ▼ 启动 sidecar │ +│ ┌───────────────────────────────────────────┐ │ +│ │ Sidecar (sidecar/server.mjs) │ │ +│ │ - 独立 Node 进程 │ │ +│ │ - 收集系统指标 │ │ +│ │ - 收集 OpenClaw 状态 │ │ +│ │ - 发送到 HarborForge Monitor │ │ +│ └───────────────────────────────────────────┘ │ +└─────────────────────────────────────────────────┘ + │ + ▼ HTTP/WebSocket + ┌─────────────────────┐ + │ HarborForge Monitor │ + └─────────────────────┘ +``` + +## 安装 + +### 1. 复制插件到 OpenClaw 插件目录 + +```bash +# 找到 OpenClaw 插件目录 +# 通常是 ~/.openclaw/plugins/ 或 /usr/lib/node_modules/openclaw/plugins/ + +# 复制插件 +cp -r HarborForge.OpenclawPlugin ~/.openclaw/plugins/harborforge-monitor +``` + +### 2. 在 HarborForge Monitor 中注册服务器 + +1. 登录 HarborForge Monitor +2. 进入 Server Management +3. 点击 "Register New Server" +4. 获取 `challengeUuid` + +### 3. 配置 OpenClaw + +编辑 `~/.openclaw/openclaw.json`: + +```json +{ + "plugins": { + "harborforge-monitor": { + "enabled": true, + "backendUrl": "https://monitor.hangman-lab.top", + "identifier": "my-server-01", + "challengeUuid": "your-challenge-uuid-here", + "reportIntervalSec": 30, + "httpFallbackIntervalSec": 60, + "logLevel": "info" + } + } +} +``` + +### 4. 重启 OpenClaw Gateway + +```bash +openclaw gateway restart +``` + +## 配置选项 + +| 选项 | 类型 | 默认值 | 说明 | +|------|------|--------|------| +| `enabled` | boolean | `true` | 是否启用插件 | +| `backendUrl` | string | `"https://monitor.hangman-lab.top"` | Monitor 后端地址 | +| `identifier` | string | 自动检测 hostname | 服务器标识符 | +| `challengeUuid` | string | 必填 | 注册挑战 UUID | +| `reportIntervalSec` | number | `30` | 报告间隔(秒) | +| `httpFallbackIntervalSec` | number | `60` | HTTP 回退间隔(秒) | +| `logLevel` | string | `"info"` | 日志级别: debug/info/warn/error | + +## 收集的指标 + +### 系统指标 +- CPU 使用率 (%) +- 内存使用率 (%)、已用/总量 (MB) +- 磁盘使用率 (%)、已用/总量 (GB) +- 交换分区使用率 (%) +- 系统运行时间 (秒) +- 1分钟平均负载 +- 平台 (linux/darwin/win32) +- 主机名 + +### OpenClaw 指标 +- OpenClaw 版本 +- Agent 数量 +- Agent 列表 (id, name, status) + +## 故障排查 + +### 查看日志 + +```bash +# 查看 Gateway 日志 +openclaw gateway logs | grep HF-Monitor + +# 或者直接查看 sidecar 输出(如果独立运行) +node sidecar/server.mjs 2>&1 | tee monitor.log +``` + +### 检查状态 + +在 OpenClaw 对话中: + +``` +使用 harborforge_monitor_status 工具检查插件状态 +``` + +### 常见问题 + +1. **challengeUuid 未设置** + - 错误: `Missing required config: challengeUuid` + - 解决: 在 Monitor 中注册服务器并配置 challengeUuid + +2. **Sidecar 无法启动** + - 检查 Node.js 版本 (>=18) + - 检查 `sidecar/server.mjs` 是否存在 + +3. **无法连接到 Monitor** + - 检查 `backendUrl` 配置 + - 检查网络连接和防火墙 + +## 开发 + +### 本地测试 sidecar + +```bash +cd sidecar +HF_MONITOR_CHALLENGE_UUID=test-uuid \ +HF_MONITOR_BACKEND_URL=http://localhost:8000 \ +HF_MONITOR_LOG_LEVEL=debug \ +node server.mjs +``` + +## 文档 + +- [监控连接器规划](./docs/monitor-server-connector-plan.md) - 原始设计文档 diff --git a/index.mjs b/index.mjs new file mode 100644 index 0000000..ef14b7d --- /dev/null +++ b/index.mjs @@ -0,0 +1,178 @@ +/** + * HarborForge Monitor Plugin for OpenClaw + * + * Registers with OpenClaw Gateway and manages sidecar lifecycle. + * Sidecar runs as separate Node process to avoid blocking Gateway. + */ +import { spawn } from 'child_process'; +import { fileURLToPath } from 'url'; +import { dirname, join } from 'path'; +import { existsSync } from 'fs'; + +const __filename = fileURLToPath(import.meta.url); +const __dirname = dirname(__filename); + +/** @type {import('openclaw').Plugin} */ +export default function register(api, config) { + const logger = api.logger || { + info: (...args) => console.log('[HF-Monitor]', ...args), + error: (...args) => console.error('[HF-Monitor]', ...args), + debug: (...args) => console.debug('[HF-Monitor]', ...args) + }; + + if (!config?.enabled) { + logger.info('HarborForge Monitor plugin disabled'); + return; + } + + // Validate required config + if (!config.challengeUuid) { + logger.error('Missing required config: challengeUuid'); + logger.error('Please register server in HarborForge Monitor first'); + return; + } + + const sidecarPath = join(__dirname, 'sidecar', 'server.mjs'); + + if (!existsSync(sidecarPath)) { + logger.error('Sidecar not found:', sidecarPath); + return; + } + + /** @type {import('child_process').ChildProcess|null} */ + let sidecar = null; + + /** + * Start the sidecar server + */ + function startSidecar() { + if (sidecar) { + logger.debug('Sidecar already running'); + return; + } + + logger.info('Starting HarborForge Monitor sidecar...'); + + // Prepare environment for sidecar + const env = { + ...process.env, + HF_MONITOR_BACKEND_URL: config.backendUrl || 'https://monitor.hangman-lab.top', + HF_MONITOR_IDENTIFIER: config.identifier || '', + HF_MONITOR_CHALLENGE_UUID: config.challengeUuid, + HF_MONITOR_REPORT_INTERVAL: String(config.reportIntervalSec || 30), + HF_MONITOR_HTTP_FALLBACK_INTERVAL: String(config.httpFallbackIntervalSec || 60), + HF_MONITOR_LOG_LEVEL: config.logLevel || 'info', + // Pass OpenClaw info for metrics + OPENCLAW_PATH: process.env.OPENCLAW_PATH || join(process.env.HOME || '/root', '.openclaw'), + OPENCLAW_VERSION: api.version || 'unknown', + }; + + // Spawn sidecar as detached process so it survives Gateway briefly during restart + sidecar = spawn('node', [sidecarPath], { + env, + detached: false, // Keep attached for logging, but could be true for full detachment + stdio: ['ignore', 'pipe', 'pipe'] + }); + + sidecar.stdout?.on('data', (data) => { + logger.info('[sidecar]', data.toString().trim()); + }); + + sidecar.stderr?.on('data', (data) => { + logger.error('[sidecar]', data.toString().trim()); + }); + + sidecar.on('exit', (code, signal) => { + logger.info(`Sidecar exited (code: ${code}, signal: ${signal})`); + sidecar = null; + }); + + sidecar.on('error', (err) => { + logger.error('Failed to start sidecar:', err.message); + sidecar = null; + }); + + logger.info('Sidecar started with PID:', sidecar.pid); + } + + /** + * Stop the sidecar server + */ + function stopSidecar() { + if (!sidecar) { + logger.debug('Sidecar not running'); + return; + } + + logger.info('Stopping HarborForge Monitor sidecar...'); + + // Graceful shutdown + sidecar.kill('SIGTERM'); + + // Force kill after timeout + const timeout = setTimeout(() => { + if (sidecar && !sidecar.killed) { + logger.warn('Sidecar did not exit gracefully, forcing kill'); + sidecar.kill('SIGKILL'); + } + }, 5000); + + sidecar.on('exit', () => { + clearTimeout(timeout); + }); + } + + // Hook into Gateway lifecycle + api.on('gateway:start', () => { + logger.info('Gateway starting, starting monitor sidecar...'); + startSidecar(); + }); + + api.on('gateway:stop', () => { + logger.info('Gateway stopping, stopping monitor sidecar...'); + stopSidecar(); + }); + + // Also handle process signals directly + process.on('SIGTERM', () => { + stopSidecar(); + }); + + process.on('SIGINT', () => { + stopSidecar(); + }); + + // Start immediately if Gateway is already running + if (api.isRunning?.()) { + startSidecar(); + } else { + // Delay start slightly to ensure Gateway is fully up + setTimeout(() => { + startSidecar(); + }, 1000); + } + + // Register status tool + api.registerTool(() => ({ + name: 'harborforge_monitor_status', + description: 'Get HarborForge Monitor plugin status', + parameters: { + type: 'object', + properties: {} + }, + async execute() { + return { + enabled: true, + sidecarRunning: sidecar !== null && sidecar.exitCode === null, + pid: sidecar?.pid || null, + config: { + backendUrl: config.backendUrl, + identifier: config.identifier || 'auto-detected', + reportIntervalSec: config.reportIntervalSec + } + }; + } + })); + + logger.info('HarborForge Monitor plugin registered'); +} diff --git a/openclaw.plugin.json b/openclaw.plugin.json new file mode 100644 index 0000000..e39e010 --- /dev/null +++ b/openclaw.plugin.json @@ -0,0 +1,48 @@ +{ + "id": "harborforge-monitor", + "name": "HarborForge Monitor", + "version": "0.1.0", + "description": "Server monitoring plugin for HarborForge - streams telemetry to Monitor", + "entry": "./index.mjs", + "configSchema": { + "type": "object", + "additionalProperties": false, + "properties": { + "enabled": { + "type": "boolean", + "default": true, + "description": "Enable the monitor plugin" + }, + "backendUrl": { + "type": "string", + "default": "https://monitor.hangman-lab.top", + "description": "HarborForge Monitor backend URL" + }, + "identifier": { + "type": "string", + "description": "Server identifier (auto-detected from hostname if not set)" + }, + "challengeUuid": { + "type": "string", + "description": "Registration challenge UUID from Monitor" + }, + "reportIntervalSec": { + "type": "number", + "default": 30, + "description": "How often to report metrics (seconds)" + }, + "httpFallbackIntervalSec": { + "type": "number", + "default": 60, + "description": "HTTP heartbeat interval when WS unavailable" + }, + "logLevel": { + "type": "string", + "enum": ["debug", "info", "warn", "error"], + "default": "info", + "description": "Logging level" + } + }, + "required": ["challengeUuid"] + } +} diff --git a/package.json b/package.json new file mode 100644 index 0000000..2deaf45 --- /dev/null +++ b/package.json @@ -0,0 +1,16 @@ +{ + "name": "harborforge-openclaw-plugin", + "version": "0.1.0", + "description": "OpenClaw plugin for HarborForge Monitor - streams server telemetry", + "type": "module", + "main": "index.mjs", + "scripts": { + "start": "node index.mjs", + "sidecar": "node sidecar/server.mjs" + }, + "keywords": ["openclaw", "plugin", "monitoring", "harborforge"], + "license": "MIT", + "engines": { + "node": ">=18.0.0" + } +} diff --git a/sidecar/server.mjs b/sidecar/server.mjs new file mode 100644 index 0000000..913727c --- /dev/null +++ b/sidecar/server.mjs @@ -0,0 +1,302 @@ +/** + * HarborForge Monitor Sidecar Server + * + * Runs as separate process from Gateway. + * Collects system metrics and OpenClaw status, sends to Monitor. + */ +import { createServer } from 'http'; +import { readFile, access } from 'fs/promises'; +import { constants } from 'fs'; +import { exec } from 'child_process'; +import { promisify } from 'util'; +import { platform, hostname, freemem, totalmem, uptime } from 'os'; + +const execAsync = promisify(exec); + +// Config from environment (set by plugin) +const CONFIG = { + backendUrl: process.env.HF_MONITOR_BACKEND_URL || 'https://monitor.hangman-lab.top', + identifier: process.env.HF_MONITOR_IDENTIFIER || hostname(), + challengeUuid: process.env.HF_MONITOR_CHALLENGE_UUID, + reportIntervalSec: parseInt(process.env.HF_MONITOR_REPORT_INTERVAL || '30', 10), + httpFallbackIntervalSec: parseInt(process.env.HF_MONITOR_HTTP_FALLBACK_INTERVAL || '60', 10), + logLevel: process.env.HF_MONITOR_LOG_LEVEL || 'info', + openclawPath: process.env.OPENCLAW_PATH || `${process.env.HOME}/.openclaw`, + openclawVersion: process.env.OPENCLAW_VERSION || 'unknown', +}; + +// Logging +const log = { + debug: (...args) => CONFIG.logLevel === 'debug' && console.log('[DEBUG]', ...args), + info: (...args) => ['debug', 'info'].includes(CONFIG.logLevel) && console.log('[INFO]', ...args), + warn: (...args) => console.log('[WARN]', ...args), + error: (...args) => console.error('[ERROR]', ...args), +}; + +// State +let wsConnection = null; +let lastSuccessfulSend = null; +let consecutiveFailures = 0; +let isShuttingDown = false; + +/** + * Collect system metrics + */ +async function collectSystemMetrics() { + try { + // CPU usage (average over 1 second) + const cpuUsage = await getCpuUsage(); + + // Memory + const memTotal = totalmem(); + const memFree = freemem(); + const memUsed = memTotal - memFree; + + // Disk usage + const diskInfo = await getDiskUsage(); + + // Load average + const loadAvg = platform() !== 'win32' ? require('os').loadavg() : [0, 0, 0]; + + return { + cpu_pct: cpuUsage, + mem_pct: Math.round((memUsed / memTotal) * 100 * 10) / 10, + mem_used_mb: Math.round(memUsed / 1024 / 1024), + mem_total_mb: Math.round(memTotal / 1024 / 1024), + disk_pct: diskInfo.usedPct, + disk_used_gb: Math.round(diskInfo.usedGB * 10) / 10, + disk_total_gb: Math.round(diskInfo.totalGB * 10) / 10, + swap_pct: diskInfo.swapUsedPct || 0, + uptime_sec: Math.floor(uptime()), + load_avg_1m: Math.round(loadAvg[0] * 100) / 100, + platform: platform(), + hostname: hostname(), + }; + } catch (err) { + log.error('Failed to collect system metrics:', err.message); + return {}; + } +} + +/** + * Get CPU usage percentage + */ +async function getCpuUsage() { + try { + if (platform() === 'linux') { + const { stdout } = await execAsync("top -bn1 | grep 'Cpu(s)' | awk '{print $2}' | cut -d'%' -f1"); + const usage = parseFloat(stdout.trim()); + return isNaN(usage) ? 0 : Math.round(usage * 10) / 10; + } else if (platform() === 'darwin') { + const { stdout } = await execAsync("top -l 1 | grep 'CPU usage' | awk '{print $3}' | cut -d'%' -f1"); + const usage = parseFloat(stdout.trim()); + return isNaN(usage) ? 0 : Math.round(usage * 10) / 10; + } + } catch { + // Fallback: calculate from /proc/stat on Linux + try { + const stat = await readFile('/proc/stat', 'utf8'); + const cpuLine = stat.split('\n')[0]; + const parts = cpuLine.split(/\s+/).slice(1).map(Number); + const idle = parts[3]; + const total = parts.reduce((a, b) => a + b, 0); + const usage = ((total - idle) / total) * 100; + return Math.round(usage * 10) / 10; + } catch { + return 0; + } + } + return 0; +} + +/** + * Get disk usage + */ +async function getDiskUsage() { + try { + if (platform() === 'linux' || platform() === 'darwin') { + const { stdout } = await execAsync("df -h / | tail -1 | awk '{print $2,$3,$5}'"); + const [total, used, pct] = stdout.trim().split(/\s+/); + return { + totalGB: parseSizeToGB(total), + usedGB: parseSizeToGB(used), + usedPct: parseInt(pct.replace('%', ''), 10), + }; + } + } catch (err) { + log.debug('Failed to get disk usage:', err.message); + } + return { totalGB: 0, usedGB: 0, usedPct: 0 }; +} + +/** + * Parse size string (like '50G' or '100M') to GB + */ +function parseSizeToGB(size) { + const num = parseFloat(size); + if (size.includes('T')) return num * 1024; + if (size.includes('G')) return num; + if (size.includes('M')) return num / 1024; + if (size.includes('K')) return num / 1024 / 1024; + return num; +} + +/** + * Collect OpenClaw status + */ +async function collectOpenclawStatus() { + try { + const agents = await getOpenclawAgents(); + + return { + version: CONFIG.openclawVersion, + agent_count: agents.length, + agents: agents.map(a => ({ + id: a.id, + name: a.name, + status: a.status, + })), + }; + } catch (err) { + log.debug('Failed to collect OpenClaw status:', err.message); + return { version: CONFIG.openclawVersion, agent_count: 0, agents: [] }; + } +} + +/** + * Get list of OpenClaw agents from local state + */ +async function getOpenclawAgents() { + try { + // Try to read agent config/state from OpenClaw directory + const agentConfigPath = `${CONFIG.openclawPath}/agents.json`; + try { + await access(agentConfigPath, constants.R_OK); + const data = JSON.parse(await readFile(agentConfigPath, 'utf8')); + return data.agents || []; + } catch { + // Fallback: return empty list + return []; + } + } catch { + return []; + } +} + +/** + * Build telemetry payload + */ +async function buildPayload() { + const system = await collectSystemMetrics(); + const openclaw = await collectOpenclawStatus(); + + return { + identifier: CONFIG.identifier, + challenge_uuid: CONFIG.challengeUuid, + timestamp: new Date().toISOString(), + ...system, + openclaw_version: openclaw.version, + openclaw_agents: openclaw.agents, + openclaw_agent_count: openclaw.agent_count, + }; +} + +/** + * Send telemetry via HTTP + */ +async function sendHttpHeartbeat() { + try { + const payload = await buildPayload(); + + log.debug('Sending HTTP heartbeat...'); + + const response = await fetch(`${CONFIG.backendUrl}/monitor/server/heartbeat`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'X-Server-Identifier': CONFIG.identifier, + 'X-Challenge-UUID': CONFIG.challengeUuid, + }, + body: JSON.stringify(payload), + }); + + if (response.ok) { + log.debug('HTTP heartbeat sent successfully'); + lastSuccessfulSend = Date.now(); + consecutiveFailures = 0; + return true; + } else { + throw new Error(`HTTP ${response.status}: ${await response.text()}`); + } + } catch (err) { + log.error('HTTP heartbeat failed:', err.message); + consecutiveFailures++; + return false; + } +} + +/** + * Main reporting loop + */ +async function reportingLoop() { + while (!isShuttingDown) { + try { + // Try HTTP (WebSocket can be added later) + const success = await sendHttpHeartbeat(); + + // Calculate next interval with backoff on failure + let interval = CONFIG.reportIntervalSec * 1000; + if (!success) { + // Exponential backoff: max 5 minutes + const backoff = Math.min(consecutiveFailures * 10000, 300000); + interval = Math.max(interval, backoff); + log.info(`Retry in ${interval}ms (backoff)`); + } + + // Sleep until next report + await new Promise(resolve => setTimeout(resolve, interval)); + + } catch (err) { + log.error('Reporting loop error:', err.message); + await new Promise(resolve => setTimeout(resolve, 30000)); + } + } +} + +/** + * Graceful shutdown + */ +function shutdown() { + log.info('Shutting down sidecar...'); + isShuttingDown = true; + + if (wsConnection) { + wsConnection.close(); + } + + // Send final heartbeat + sendHttpHeartbeat().finally(() => { + process.exit(0); + }); +} + +// Handle signals +process.on('SIGTERM', shutdown); +process.on('SIGINT', shutdown); + +// Start +log.info('HarborForge Monitor Sidecar starting...'); +log.info('Config:', { + identifier: CONFIG.identifier, + backendUrl: CONFIG.backendUrl, + reportIntervalSec: CONFIG.reportIntervalSec, +}); + +// Validate config +if (!CONFIG.challengeUuid) { + log.error('Missing HF_MONITOR_CHALLENGE_UUID environment variable'); + process.exit(1); +} + +// Start reporting loop +reportingLoop();