feat(monitor): active push loop replacing standalone monitor

Adds a periodic POST loop to <backend>/monitor/server/heartbeat so
HF plugin can take over the standalone harborforge-monitor daemon's
job — same X-API-Key header, same flat telemetry shape (cpu_pct /
mem_pct / disk_pct / swap_pct / load_avg / uptime_seconds /
plugin_version / agents[]). HF backend stays unchanged.

Config: monitor_push_enabled (default false; opt-in to avoid surprise
heartbeats from existing deployments), monitor_push_interval_seconds
(default 30), reuses apiKey for the X-API-Key header. Lift the
container's HF_MONITER_API_KEY into config.apiKey, flip
monitor_push_enabled true, then docker rm -f the container — DB
last_seen_at keeps advancing under the plugin's loop.

Collector grew swap + cpu sampling (two reads of /proc/stat over a
1-second window when SampleCPU=true). Bridge endpoint stays cheap
(SampleCPU=false on demand); push loop is the only caller paying the
sampling cost.

E2E in sim: monitor_push_enabled=true + apiKey from injected
MonitoredServer row → server_states.last_seen_at advances exactly
every interval_seconds (10s configured, 10s observed). cpu/mem/disk/
swap_pct all populate correctly.
This commit is contained in:
h z
2026-06-03 13:04:51 +01:00
parent 472cecd771
commit 6e3ad669f8
7 changed files with 448 additions and 30 deletions

View File

@@ -41,6 +41,7 @@ type harborForgePlugin struct {
host sdkplugin.HostAPI
cfg hfcfg.Resolved
bridge *monitor.Bridge
pusher *monitor.Pusher
sched *calendar.Scheduler
deps tools.Deps
cancelBg context.CancelFunc
@@ -66,11 +67,12 @@ func (p *harborForgePlugin) Init(ctx context.Context, host sdkplugin.HostAPI) er
}
p.cfg = hfcfg.Resolve(raw)
host.Log("info", "harbor-forge plugin initialized", map[string]any{
"version": Version,
"backend": p.cfg.BackendURL,
"identifier": p.cfg.Identifier,
"monitor_port": p.cfg.MonitorPort,
"calendar_enabled": p.cfg.CalendarEnabled,
"version": Version,
"backend": p.cfg.BackendURL,
"identifier": p.cfg.Identifier,
"monitor_port": p.cfg.MonitorPort,
"monitor_push_enabled": p.cfg.MonitorPushEnabled,
"calendar_enabled": p.cfg.CalendarEnabled,
})
bgCtx, cancel := context.WithCancel(context.Background())
@@ -79,15 +81,22 @@ func (p *harborForgePlugin) Init(ctx context.Context, host sdkplugin.HostAPI) er
// Listers + collectors capture bgCtx (not Init ctx) — Init returns
// once MCP initialize completes, but the plugin process lives on
// and so do the goroutines + closures we registered.
collect := func() telemetry.Snapshot {
return telemetry.Collect(telemetry.CollectOpts{
Identifier: p.cfg.Identifier,
Version: Version,
AgentLister: func() []telemetry.AgentInfo {
return p.listAgents(bgCtx, profileRoot)
},
})
makeCollector := func(sampleCPU bool) func() telemetry.Snapshot {
return func() telemetry.Snapshot {
return telemetry.Collect(telemetry.CollectOpts{
Identifier: p.cfg.Identifier,
Version: Version,
SampleCPU: sampleCPU,
AgentLister: func() []telemetry.AgentInfo {
return p.listAgents(bgCtx, profileRoot)
},
})
}
}
// Bridge serves on-demand reads; cheap, no CPU sampling.
collect := makeCollector(false)
// Pusher runs the slow push loop; CPU sampling fine here.
collectForPush := makeCollector(true)
p.bridge = monitor.New(p.cfg.MonitorPort, collect,
func(level, msg string, attrs map[string]any) { host.Log(level, msg, attrs) })
@@ -96,6 +105,25 @@ func (p *harborForgePlugin) Init(ctx context.Context, host sdkplugin.HostAPI) er
host.Log("warn", "monitor bridge failed to start", map[string]any{"err": err.Error()})
}
// Active push loop — replaces the standalone harborforge-monitor
// container. Off by default; operator opts in via
// monitor_push_enabled + apiKey.
p.pusher = monitor.NewPusher(monitor.PusherConfig{
BackendURL: p.cfg.BackendURL,
APIKey: p.cfg.APIKey,
Interval: time.Duration(p.cfg.MonitorPushIntervalSeconds) * time.Second,
}, collectForPush,
func(level, msg string, attrs map[string]any) { host.Log(level, msg, attrs) })
if p.cfg.MonitorPushEnabled {
p.wg.Add(1)
go func() {
defer p.wg.Done()
if err := p.pusher.Run(bgCtx); err != nil && !errors.Is(err, context.Canceled) {
host.Log("warn", "monitor pusher exited", map[string]any{"err": err.Error()})
}
}()
}
calBackend := p.cfg.CalendarBackendURL
if calBackend == "" {
calBackend = p.cfg.BackendURL
@@ -128,6 +156,7 @@ func (p *harborForgePlugin) Init(ctx context.Context, host sdkplugin.HostAPI) er
Version: Version,
Collect: collect,
Bridge: p.bridge,
Pusher: p.pusher,
Scheduler: p.sched,
Host: host,
AgentIDFromCtx: func(ctx context.Context) string {