/** * HarborForge Monitor Sidecar Server * * Runs as separate process from Gateway. * Collects system metrics and OpenClaw status, sends to Monitor. */ import { createServer } from 'http'; import { readFile, access } from 'fs/promises'; import { constants } from 'fs'; import { exec } from 'child_process'; import { promisify } from 'util'; import { platform, hostname, freemem, totalmem, uptime } from 'os'; const execAsync = promisify(exec); // Config from environment (set by plugin) const CONFIG = { backendUrl: process.env.HF_MONITOR_BACKEND_URL || 'https://monitor.hangman-lab.top', identifier: process.env.HF_MONITOR_IDENTIFIER || hostname(), challengeUuid: process.env.HF_MONITOR_CHALLENGE_UUID, reportIntervalSec: parseInt(process.env.HF_MONITOR_REPORT_INTERVAL || '30', 10), httpFallbackIntervalSec: parseInt(process.env.HF_MONITOR_HTTP_FALLBACK_INTERVAL || '60', 10), logLevel: process.env.HF_MONITOR_LOG_LEVEL || 'info', openclawPath: process.env.OPENCLAW_PATH || `${process.env.HOME}/.openclaw`, openclawVersion: process.env.OPENCLAW_VERSION || 'unknown', }; // Logging const log = { debug: (...args) => CONFIG.logLevel === 'debug' && console.log('[DEBUG]', ...args), info: (...args) => ['debug', 'info'].includes(CONFIG.logLevel) && console.log('[INFO]', ...args), warn: (...args) => console.log('[WARN]', ...args), error: (...args) => console.error('[ERROR]', ...args), }; // State let wsConnection = null; let lastSuccessfulSend = null; let consecutiveFailures = 0; let isShuttingDown = false; /** * Collect system metrics */ async function collectSystemMetrics() { try { // CPU usage (average over 1 second) const cpuUsage = await getCpuUsage(); // Memory const memTotal = totalmem(); const memFree = freemem(); const memUsed = memTotal - memFree; // Disk usage const diskInfo = await getDiskUsage(); // Load average const loadAvg = platform() !== 'win32' ? require('os').loadavg() : [0, 0, 0]; return { cpu_pct: cpuUsage, mem_pct: Math.round((memUsed / memTotal) * 100 * 10) / 10, mem_used_mb: Math.round(memUsed / 1024 / 1024), mem_total_mb: Math.round(memTotal / 1024 / 1024), disk_pct: diskInfo.usedPct, disk_used_gb: Math.round(diskInfo.usedGB * 10) / 10, disk_total_gb: Math.round(diskInfo.totalGB * 10) / 10, swap_pct: diskInfo.swapUsedPct || 0, uptime_sec: Math.floor(uptime()), load_avg_1m: Math.round(loadAvg[0] * 100) / 100, platform: platform(), hostname: hostname(), }; } catch (err) { log.error('Failed to collect system metrics:', err.message); return {}; } } /** * Get CPU usage percentage */ async function getCpuUsage() { try { if (platform() === 'linux') { const { stdout } = await execAsync("top -bn1 | grep 'Cpu(s)' | awk '{print $2}' | cut -d'%' -f1"); const usage = parseFloat(stdout.trim()); return isNaN(usage) ? 0 : Math.round(usage * 10) / 10; } else if (platform() === 'darwin') { const { stdout } = await execAsync("top -l 1 | grep 'CPU usage' | awk '{print $3}' | cut -d'%' -f1"); const usage = parseFloat(stdout.trim()); return isNaN(usage) ? 0 : Math.round(usage * 10) / 10; } } catch { // Fallback: calculate from /proc/stat on Linux try { const stat = await readFile('/proc/stat', 'utf8'); const cpuLine = stat.split('\n')[0]; const parts = cpuLine.split(/\s+/).slice(1).map(Number); const idle = parts[3]; const total = parts.reduce((a, b) => a + b, 0); const usage = ((total - idle) / total) * 100; return Math.round(usage * 10) / 10; } catch { return 0; } } return 0; } /** * Get disk usage */ async function getDiskUsage() { try { if (platform() === 'linux' || platform() === 'darwin') { const { stdout } = await execAsync("df -h / | tail -1 | awk '{print $2,$3,$5}'"); const [total, used, pct] = stdout.trim().split(/\s+/); return { totalGB: parseSizeToGB(total), usedGB: parseSizeToGB(used), usedPct: parseInt(pct.replace('%', ''), 10), }; } } catch (err) { log.debug('Failed to get disk usage:', err.message); } return { totalGB: 0, usedGB: 0, usedPct: 0 }; } /** * Parse size string (like '50G' or '100M') to GB */ function parseSizeToGB(size) { const num = parseFloat(size); if (size.includes('T')) return num * 1024; if (size.includes('G')) return num; if (size.includes('M')) return num / 1024; if (size.includes('K')) return num / 1024 / 1024; return num; } /** * Collect OpenClaw status */ async function collectOpenclawStatus() { try { const agents = await getOpenclawAgents(); return { version: CONFIG.openclawVersion, agent_count: agents.length, agents: agents.map(a => ({ id: a.id, name: a.name, status: a.status, })), }; } catch (err) { log.debug('Failed to collect OpenClaw status:', err.message); return { version: CONFIG.openclawVersion, agent_count: 0, agents: [] }; } } /** * Get list of OpenClaw agents from local state */ async function getOpenclawAgents() { try { // Try to read agent config/state from OpenClaw directory const agentConfigPath = `${CONFIG.openclawPath}/agents.json`; try { await access(agentConfigPath, constants.R_OK); const data = JSON.parse(await readFile(agentConfigPath, 'utf8')); return data.agents || []; } catch { // Fallback: return empty list return []; } } catch { return []; } } /** * Build telemetry payload */ async function buildPayload() { const system = await collectSystemMetrics(); const openclaw = await collectOpenclawStatus(); return { identifier: CONFIG.identifier, challenge_uuid: CONFIG.challengeUuid, timestamp: new Date().toISOString(), ...system, openclaw_version: openclaw.version, openclaw_agents: openclaw.agents, openclaw_agent_count: openclaw.agent_count, }; } /** * Send telemetry via HTTP */ async function sendHttpHeartbeat() { try { const payload = await buildPayload(); log.debug('Sending HTTP heartbeat...'); const response = await fetch(`${CONFIG.backendUrl}/monitor/server/heartbeat`, { method: 'POST', headers: { 'Content-Type': 'application/json', 'X-Server-Identifier': CONFIG.identifier, 'X-Challenge-UUID': CONFIG.challengeUuid, }, body: JSON.stringify(payload), }); if (response.ok) { log.debug('HTTP heartbeat sent successfully'); lastSuccessfulSend = Date.now(); consecutiveFailures = 0; return true; } else { throw new Error(`HTTP ${response.status}: ${await response.text()}`); } } catch (err) { log.error('HTTP heartbeat failed:', err.message); consecutiveFailures++; return false; } } /** * Main reporting loop */ async function reportingLoop() { while (!isShuttingDown) { try { // Try HTTP (WebSocket can be added later) const success = await sendHttpHeartbeat(); // Calculate next interval with backoff on failure let interval = CONFIG.reportIntervalSec * 1000; if (!success) { // Exponential backoff: max 5 minutes const backoff = Math.min(consecutiveFailures * 10000, 300000); interval = Math.max(interval, backoff); log.info(`Retry in ${interval}ms (backoff)`); } // Sleep until next report await new Promise(resolve => setTimeout(resolve, interval)); } catch (err) { log.error('Reporting loop error:', err.message); await new Promise(resolve => setTimeout(resolve, 30000)); } } } /** * Graceful shutdown */ function shutdown() { log.info('Shutting down sidecar...'); isShuttingDown = true; if (wsConnection) { wsConnection.close(); } // Send final heartbeat sendHttpHeartbeat().finally(() => { process.exit(0); }); } // Handle signals process.on('SIGTERM', shutdown); process.on('SIGINT', shutdown); // Start log.info('HarborForge Monitor Sidecar starting...'); log.info('Config:', { identifier: CONFIG.identifier, backendUrl: CONFIG.backendUrl, reportIntervalSec: CONFIG.reportIntervalSec, }); // Validate config if (!CONFIG.challengeUuid) { log.error('Missing HF_MONITOR_CHALLENGE_UUID environment variable'); process.exit(1); } // Start reporting loop reportingLoop();