feat(monitor): active push loop replacing standalone monitor
Adds a periodic POST loop to <backend>/monitor/server/heartbeat so HF plugin can take over the standalone harborforge-monitor daemon's job — same X-API-Key header, same flat telemetry shape (cpu_pct / mem_pct / disk_pct / swap_pct / load_avg / uptime_seconds / plugin_version / agents[]). HF backend stays unchanged. Config: monitor_push_enabled (default false; opt-in to avoid surprise heartbeats from existing deployments), monitor_push_interval_seconds (default 30), reuses apiKey for the X-API-Key header. Lift the container's HF_MONITER_API_KEY into config.apiKey, flip monitor_push_enabled true, then docker rm -f the container — DB last_seen_at keeps advancing under the plugin's loop. Collector grew swap + cpu sampling (two reads of /proc/stat over a 1-second window when SampleCPU=true). Bridge endpoint stays cheap (SampleCPU=false on demand); push loop is the only caller paying the sampling cost. E2E in sim: monitor_push_enabled=true + apiKey from injected MonitoredServer row → server_states.last_seen_at advances exactly every interval_seconds (10s configured, 10s observed). cpu/mem/disk/ swap_pct all populate correctly.
This commit is contained in:
@@ -41,6 +41,7 @@ type harborForgePlugin struct {
|
||||
host sdkplugin.HostAPI
|
||||
cfg hfcfg.Resolved
|
||||
bridge *monitor.Bridge
|
||||
pusher *monitor.Pusher
|
||||
sched *calendar.Scheduler
|
||||
deps tools.Deps
|
||||
cancelBg context.CancelFunc
|
||||
@@ -66,11 +67,12 @@ func (p *harborForgePlugin) Init(ctx context.Context, host sdkplugin.HostAPI) er
|
||||
}
|
||||
p.cfg = hfcfg.Resolve(raw)
|
||||
host.Log("info", "harbor-forge plugin initialized", map[string]any{
|
||||
"version": Version,
|
||||
"backend": p.cfg.BackendURL,
|
||||
"identifier": p.cfg.Identifier,
|
||||
"monitor_port": p.cfg.MonitorPort,
|
||||
"calendar_enabled": p.cfg.CalendarEnabled,
|
||||
"version": Version,
|
||||
"backend": p.cfg.BackendURL,
|
||||
"identifier": p.cfg.Identifier,
|
||||
"monitor_port": p.cfg.MonitorPort,
|
||||
"monitor_push_enabled": p.cfg.MonitorPushEnabled,
|
||||
"calendar_enabled": p.cfg.CalendarEnabled,
|
||||
})
|
||||
|
||||
bgCtx, cancel := context.WithCancel(context.Background())
|
||||
@@ -79,15 +81,22 @@ func (p *harborForgePlugin) Init(ctx context.Context, host sdkplugin.HostAPI) er
|
||||
// Listers + collectors capture bgCtx (not Init ctx) — Init returns
|
||||
// once MCP initialize completes, but the plugin process lives on
|
||||
// and so do the goroutines + closures we registered.
|
||||
collect := func() telemetry.Snapshot {
|
||||
return telemetry.Collect(telemetry.CollectOpts{
|
||||
Identifier: p.cfg.Identifier,
|
||||
Version: Version,
|
||||
AgentLister: func() []telemetry.AgentInfo {
|
||||
return p.listAgents(bgCtx, profileRoot)
|
||||
},
|
||||
})
|
||||
makeCollector := func(sampleCPU bool) func() telemetry.Snapshot {
|
||||
return func() telemetry.Snapshot {
|
||||
return telemetry.Collect(telemetry.CollectOpts{
|
||||
Identifier: p.cfg.Identifier,
|
||||
Version: Version,
|
||||
SampleCPU: sampleCPU,
|
||||
AgentLister: func() []telemetry.AgentInfo {
|
||||
return p.listAgents(bgCtx, profileRoot)
|
||||
},
|
||||
})
|
||||
}
|
||||
}
|
||||
// Bridge serves on-demand reads; cheap, no CPU sampling.
|
||||
collect := makeCollector(false)
|
||||
// Pusher runs the slow push loop; CPU sampling fine here.
|
||||
collectForPush := makeCollector(true)
|
||||
|
||||
p.bridge = monitor.New(p.cfg.MonitorPort, collect,
|
||||
func(level, msg string, attrs map[string]any) { host.Log(level, msg, attrs) })
|
||||
@@ -96,6 +105,25 @@ func (p *harborForgePlugin) Init(ctx context.Context, host sdkplugin.HostAPI) er
|
||||
host.Log("warn", "monitor bridge failed to start", map[string]any{"err": err.Error()})
|
||||
}
|
||||
|
||||
// Active push loop — replaces the standalone harborforge-monitor
|
||||
// container. Off by default; operator opts in via
|
||||
// monitor_push_enabled + apiKey.
|
||||
p.pusher = monitor.NewPusher(monitor.PusherConfig{
|
||||
BackendURL: p.cfg.BackendURL,
|
||||
APIKey: p.cfg.APIKey,
|
||||
Interval: time.Duration(p.cfg.MonitorPushIntervalSeconds) * time.Second,
|
||||
}, collectForPush,
|
||||
func(level, msg string, attrs map[string]any) { host.Log(level, msg, attrs) })
|
||||
if p.cfg.MonitorPushEnabled {
|
||||
p.wg.Add(1)
|
||||
go func() {
|
||||
defer p.wg.Done()
|
||||
if err := p.pusher.Run(bgCtx); err != nil && !errors.Is(err, context.Canceled) {
|
||||
host.Log("warn", "monitor pusher exited", map[string]any{"err": err.Error()})
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
calBackend := p.cfg.CalendarBackendURL
|
||||
if calBackend == "" {
|
||||
calBackend = p.cfg.BackendURL
|
||||
@@ -128,6 +156,7 @@ func (p *harborForgePlugin) Init(ctx context.Context, host sdkplugin.HostAPI) er
|
||||
Version: Version,
|
||||
Collect: collect,
|
||||
Bridge: p.bridge,
|
||||
Pusher: p.pusher,
|
||||
Scheduler: p.sched,
|
||||
Host: host,
|
||||
AgentIDFromCtx: func(ctx context.Context) string {
|
||||
|
||||
Reference in New Issue
Block a user