HarborForge.OpenclawPlugin/sidecar/server.mjs

/**
 * HarborForge Monitor Sidecar Server
 *
 * Runs as separate process from Gateway.
 * Collects system metrics and OpenClaw status, sends to Monitor.
 */
import { createServer } from 'http';
import { readFile, access } from 'fs/promises';
import { constants } from 'fs';
import { exec } from 'child_process';
import { promisify } from 'util';
import { platform, hostname, freemem, totalmem, uptime } from 'os';

const execAsync = promisify(exec);

// Config from environment (set by plugin)
const CONFIG = {
  backendUrl: process.env.HF_MONITOR_BACKEND_URL || 'https://monitor.hangman-lab.top',
  identifier: process.env.HF_MONITOR_IDENTIFIER || hostname(),
  challengeUuid: process.env.HF_MONITOR_CHALLENGE_UUID,
  reportIntervalSec: parseInt(process.env.HF_MONITOR_REPORT_INTERVAL || '30', 10),
  httpFallbackIntervalSec: parseInt(process.env.HF_MONITOR_HTTP_FALLBACK_INTERVAL || '60', 10),
  logLevel: process.env.HF_MONITOR_LOG_LEVEL || 'info',
  openclawPath: process.env.OPENCLAW_PATH || `${process.env.HOME}/.openclaw`,
  openclawVersion: process.env.OPENCLAW_VERSION || 'unknown',
};

// Logging
const log = {
  debug: (...args) => CONFIG.logLevel === 'debug' && console.log('[DEBUG]', ...args),
  info: (...args) => ['debug', 'info'].includes(CONFIG.logLevel) && console.log('[INFO]', ...args),
  warn: (...args) => console.log('[WARN]', ...args),
  error: (...args) => console.error('[ERROR]', ...args),
};

// State
let wsConnection = null;
let lastSuccessfulSend = null;
let consecutiveFailures = 0;
let isShuttingDown = false;

/**
 * Collect system metrics
 */
async function collectSystemMetrics() {
  try {
    // CPU usage (average over 1 second)
    const cpuUsage = await getCpuUsage();

    // Memory
    const memTotal = totalmem();
    const memFree = freemem();
    const memUsed = memTotal - memFree;

    // Disk usage
    const diskInfo = await getDiskUsage();

    // Load average
    const loadAvg = platform() !== 'win32' ? require('os').loadavg() : [0, 0, 0];

    return {
      cpu_pct: cpuUsage,
      mem_pct: Math.round((memUsed / memTotal) * 100 * 10) / 10,
      mem_used_mb: Math.round(memUsed / 1024 / 1024),
      mem_total_mb: Math.round(memTotal / 1024 / 1024),
      disk_pct: diskInfo.usedPct,
      disk_used_gb: Math.round(diskInfo.usedGB * 10) / 10,
      disk_total_gb: Math.round(diskInfo.totalGB * 10) / 10,
      swap_pct: diskInfo.swapUsedPct || 0,
      uptime_sec: Math.floor(uptime()),
      load_avg_1m: Math.round(loadAvg[0] * 100) / 100,
      platform: platform(),
      hostname: hostname(),
    };
  } catch (err) {
    log.error('Failed to collect system metrics:', err.message);
    return {};
  }
}

/**
 * Get CPU usage percentage
 */
async function getCpuUsage() {
  try {
    if (platform() === 'linux') {
      const { stdout } = await execAsync("top -bn1 | grep 'Cpu(s)' | awk '{print $2}' | cut -d'%' -f1");
      const usage = parseFloat(stdout.trim());
      return isNaN(usage) ? 0 : Math.round(usage * 10) / 10;
    } else if (platform() === 'darwin') {
      const { stdout } = await execAsync("top -l 1 | grep 'CPU usage' | awk '{print $3}' | cut -d'%' -f1");
      const usage = parseFloat(stdout.trim());
      return isNaN(usage) ? 0 : Math.round(usage * 10) / 10;
    }
  } catch {
    // Fallback: calculate from /proc/stat on Linux
    try {
      const stat = await readFile('/proc/stat', 'utf8');
      const cpuLine = stat.split('\n')[0];
      const parts = cpuLine.split(/\s+/).slice(1).map(Number);
      const idle = parts[3];
      const total = parts.reduce((a, b) => a + b, 0);
      const usage = ((total - idle) / total) * 100;
      return Math.round(usage * 10) / 10;
    } catch {
      return 0;
    }
  }
  return 0;
}

/**
 * Get disk usage
 */
async function getDiskUsage() {
  try {
    if (platform() === 'linux' || platform() === 'darwin') {
      const { stdout } = await execAsync("df -h / | tail -1 | awk '{print $2,$3,$5}'");
      const [total, used, pct] = stdout.trim().split(/\s+/);
      return {
        totalGB: parseSizeToGB(total),
        usedGB: parseSizeToGB(used),
        usedPct: parseInt(pct.replace('%', ''), 10),
      };
    }
  } catch (err) {
    log.debug('Failed to get disk usage:', err.message);
  }
  return { totalGB: 0, usedGB: 0, usedPct: 0 };
}

/**
 * Parse size string (like '50G' or '100M') to GB
 */
function parseSizeToGB(size) {
  const num = parseFloat(size);
  if (size.includes('T')) return num * 1024;
  if (size.includes('G')) return num;
  if (size.includes('M')) return num / 1024;
  if (size.includes('K')) return num / 1024 / 1024;
  return num;
}

/**
 * Collect OpenClaw status
 */
async function collectOpenclawStatus() {
  try {
    const agents = await getOpenclawAgents();

    return {
      version: CONFIG.openclawVersion,
      agent_count: agents.length,
      agents: agents.map(a => ({
        id: a.id,
        name: a.name,
        status: a.status,
      })),
    };
  } catch (err) {
    log.debug('Failed to collect OpenClaw status:', err.message);
    return { version: CONFIG.openclawVersion, agent_count: 0, agents: [] };
  }
}

/**
 * Get list of OpenClaw agents from local state
 */
async function getOpenclawAgents() {
  try {
    // Try to read agent config/state from OpenClaw directory
    const agentConfigPath = `${CONFIG.openclawPath}/agents.json`;
    try {
      await access(agentConfigPath, constants.R_OK);
      const data = JSON.parse(await readFile(agentConfigPath, 'utf8'));
      return data.agents || [];
    } catch {
      // Fallback: return empty list
      return [];
    }
  } catch {
    return [];
  }
}

/**
 * Build telemetry payload
 */
async function buildPayload() {
  const system = await collectSystemMetrics();
  const openclaw = await collectOpenclawStatus();

  return {
    identifier: CONFIG.identifier,
    challenge_uuid: CONFIG.challengeUuid,
    timestamp: new Date().toISOString(),
    ...system,
    openclaw_version: openclaw.version,
    openclaw_agents: openclaw.agents,
    openclaw_agent_count: openclaw.agent_count,
  };
}

/**
 * Send telemetry via HTTP
 */
async function sendHttpHeartbeat() {
  try {
    const payload = await buildPayload();

    log.debug('Sending HTTP heartbeat...');

    const response = await fetch(`${CONFIG.backendUrl}/monitor/server/heartbeat`, {
      method: 'POST',
      headers: {
        'Content-Type': 'application/json',
        'X-Server-Identifier': CONFIG.identifier,
        'X-Challenge-UUID': CONFIG.challengeUuid,
      },
      body: JSON.stringify(payload),
    });

    if (response.ok) {
      log.debug('HTTP heartbeat sent successfully');
      lastSuccessfulSend = Date.now();
      consecutiveFailures = 0;
      return true;
    } else {
      throw new Error(`HTTP ${response.status}: ${await response.text()}`);
    }
  } catch (err) {
    log.error('HTTP heartbeat failed:', err.message);
    consecutiveFailures++;
    return false;
  }
}

/**
 * Main reporting loop
 */
async function reportingLoop() {
  while (!isShuttingDown) {
    try {
      // Try HTTP (WebSocket can be added later)
      const success = await sendHttpHeartbeat();

      // Calculate next interval with backoff on failure
      let interval = CONFIG.reportIntervalSec * 1000;
      if (!success) {
        // Exponential backoff: max 5 minutes
        const backoff = Math.min(consecutiveFailures * 10000, 300000);
        interval = Math.max(interval, backoff);
        log.info(`Retry in ${interval}ms (backoff)`);
      }

      // Sleep until next report
      await new Promise(resolve => setTimeout(resolve, interval));

    } catch (err) {
      log.error('Reporting loop error:', err.message);
      await new Promise(resolve => setTimeout(resolve, 30000));
    }
  }
}

/**
 * Graceful shutdown
 */
function shutdown() {
  log.info('Shutting down sidecar...');
  isShuttingDown = true;

  if (wsConnection) {
    wsConnection.close();
  }

  // Send final heartbeat
  sendHttpHeartbeat().finally(() => {
    process.exit(0);
  });
}

// Handle signals
process.on('SIGTERM', shutdown);
process.on('SIGINT', shutdown);

// Start
log.info('HarborForge Monitor Sidecar starting...');
log.info('Config:', {
  identifier: CONFIG.identifier,
  backendUrl: CONFIG.backendUrl,
  reportIntervalSec: CONFIG.reportIntervalSec,
});

// Validate config
if (!CONFIG.challengeUuid) {
  log.error('Missing HF_MONITOR_CHALLENGE_UUID environment variable');
  process.exit(1);
}

// Start reporting loop
reportingLoop();