Files
hzhang 6e3ad669f8 feat(monitor): active push loop replacing standalone monitor
Adds a periodic POST loop to <backend>/monitor/server/heartbeat so
HF plugin can take over the standalone harborforge-monitor daemon's
job — same X-API-Key header, same flat telemetry shape (cpu_pct /
mem_pct / disk_pct / swap_pct / load_avg / uptime_seconds /
plugin_version / agents[]). HF backend stays unchanged.

Config: monitor_push_enabled (default false; opt-in to avoid surprise
heartbeats from existing deployments), monitor_push_interval_seconds
(default 30), reuses apiKey for the X-API-Key header. Lift the
container's HF_MONITER_API_KEY into config.apiKey, flip
monitor_push_enabled true, then docker rm -f the container — DB
last_seen_at keeps advancing under the plugin's loop.

Collector grew swap + cpu sampling (two reads of /proc/stat over a
1-second window when SampleCPU=true). Bridge endpoint stays cheap
(SampleCPU=false on demand); push loop is the only caller paying the
sampling cost.

E2E in sim: monitor_push_enabled=true + apiKey from injected
MonitoredServer row → server_states.last_seen_at advances exactly
every interval_seconds (10s configured, 10s observed). cpu/mem/disk/
swap_pct all populate correctly.
2026-06-03 13:04:51 +01:00

243 lines
7.9 KiB
Go

// Package tools wires the 9 harborforge_* tool implementations to
// the plugin's runtime state (config, telemetry collector, monitor
// bridge, calendar scheduler). Each tool is a CallTool dispatch
// branch in main.go's plugin; this package holds the shared logic.
package tools
import (
"context"
"encoding/json"
"errors"
"fmt"
"strings"
"time"
sdkplugin "git.hangman-lab.top/hzhang/Plexum-sdk-go/plugin"
"git.hangman-lab.top/zhi/HarborForge.PlexumPlugin/internal/calendar"
"git.hangman-lab.top/zhi/HarborForge.PlexumPlugin/internal/config"
"git.hangman-lab.top/zhi/HarborForge.PlexumPlugin/internal/monitor"
"git.hangman-lab.top/zhi/HarborForge.PlexumPlugin/internal/telemetry"
)
// Deps is the bundle main.go passes when constructing the tool router.
type Deps struct {
Config config.Resolved
Version string
Collect func() telemetry.Snapshot
Bridge *monitor.Bridge
Pusher *monitor.Pusher
Scheduler *calendar.Scheduler
Host sdkplugin.HostAPI
// AgentIDFromCtx returns the agent id the call belongs to. Plexum
// host injects this via the tool dispatch context; main.go's
// CallTool reads it from the ctx and stashes here.
AgentIDFromCtx func(ctx context.Context) string
}
// Dispatch is the entry point main.go's ToolPlugin.CallTool calls.
// Returns the canonical text response. Errors come through as
// is_error=true ToolResult rather than RPC errors so the model sees
// human-readable detail.
func Dispatch(ctx context.Context, deps Deps, name string, input json.RawMessage) (sdkplugin.ToolResult, error) {
switch name {
case "harborforge_status":
return toolStatus(deps)
case "harborforge_telemetry":
return toolTelemetry(deps)
case "harborforge_monitor_telemetry":
return toolMonitorTelemetry(deps)
case "harborforge_calendar_status":
return toolCalendarStatus(deps)
case "harborforge_calendar_complete":
return toolCalendarComplete(ctx, deps, input)
case "harborforge_calendar_abort":
return toolCalendarAbort(ctx, deps, input)
case "harborforge_calendar_pause":
return toolCalendarPause(ctx, deps, input)
case "harborforge_calendar_resume":
return toolCalendarResume(ctx, deps)
case "harborforge_restart_status":
return toolRestartStatus(deps)
}
return sdkplugin.ToolResult{
IsError: true,
Content: []sdkplugin.ContentBlock{{Type: "text", Text: "unknown tool: " + name}},
}, nil
}
func toolStatus(deps Deps) (sdkplugin.ToolResult, error) {
bs := deps.Bridge.Stats()
sch := deps.Scheduler.Status()
out := map[string]any{
"plugin": map[string]any{
"name": "harbor-forge",
"version": deps.Version,
"backend": "plexum",
},
"config": map[string]any{
"backend_url": deps.Config.BackendURL,
"identifier": deps.Config.Identifier,
"monitor_port": deps.Config.MonitorPort,
"calendar_enabled": deps.Config.CalendarEnabled,
"calendar_backendurl": deps.Config.CalendarBackendURL,
},
"monitor_bridge": map[string]any{
"listening": bs.Listening,
"port": bs.Port,
"queries": bs.Queries,
"last_query": bs.LastQuery,
},
"monitor_push": monitorPushSummary(deps),
"calendar": sch,
}
return jsonResult(out)
}
// monitorPushSummary returns the pusher's last-known state in the same
// JSON layout the status/monitor_telemetry tools surface. Nil-safe: if
// no pusher is wired (testing, push disabled), reports enabled=false.
func monitorPushSummary(deps Deps) map[string]any {
out := map[string]any{
"enabled": deps.Config.MonitorPushEnabled,
"interval_seconds": deps.Config.MonitorPushIntervalSeconds,
"endpoint": deps.Config.BackendURL + "/monitor/server/heartbeat",
}
if deps.Pusher != nil {
st := deps.Pusher.Stats()
out["last_sent_at"] = st.LastSentAt
out["last_status"] = st.LastStatus
out["last_err"] = st.LastErr
out["success_count"] = st.SuccessCount
out["error_count"] = st.ErrorCount
}
return out
}
func toolTelemetry(deps Deps) (sdkplugin.ToolResult, error) {
return jsonResult(deps.Collect())
}
func toolMonitorTelemetry(deps Deps) (sdkplugin.ToolResult, error) {
bs := deps.Bridge.Stats()
return jsonResult(map[string]any{
"bridge": map[string]any{
"port": bs.Port,
"listening": bs.Listening,
"queries": bs.Queries,
"last_query": bs.LastQuery,
"last_snapshot": bs.LastSnap,
},
"push": monitorPushSummary(deps),
})
}
func toolCalendarStatus(deps Deps) (sdkplugin.ToolResult, error) {
return jsonResult(deps.Scheduler.Status())
}
func toolCalendarComplete(ctx context.Context, deps Deps, input json.RawMessage) (sdkplugin.ToolResult, error) {
var args struct{ Summary string `json:"summary"` }
_ = json.Unmarshal(input, &args)
agentID := deps.AgentIDFromCtx(ctx)
if agentID == "" {
return errResult("calendar_complete: no agent context")
}
if err := deps.Scheduler.CompleteForAgent(ctx, agentID, args.Summary); err != nil {
if errors.Is(err, calendar.ErrNoActiveSlot) {
return errResult("no active slot for agent " + agentID)
}
return errResult("complete failed: " + err.Error())
}
return okResult("slot marked completed")
}
func toolCalendarAbort(ctx context.Context, deps Deps, input json.RawMessage) (sdkplugin.ToolResult, error) {
var args struct{ Reason string `json:"reason"` }
_ = json.Unmarshal(input, &args)
agentID := deps.AgentIDFromCtx(ctx)
if agentID == "" {
return errResult("calendar_abort: no agent context")
}
if err := deps.Scheduler.AbortForAgent(ctx, agentID, args.Reason); err != nil {
if errors.Is(err, calendar.ErrNoActiveSlot) {
return errResult("no active slot for agent " + agentID)
}
return errResult("abort failed: " + err.Error())
}
return okResult("slot aborted")
}
func toolCalendarPause(ctx context.Context, deps Deps, input json.RawMessage) (sdkplugin.ToolResult, error) {
var args struct{ Reason string `json:"reason"` }
_ = json.Unmarshal(input, &args)
agentID := deps.AgentIDFromCtx(ctx)
if agentID == "" {
return errResult("calendar_pause: no agent context")
}
if err := deps.Scheduler.PauseForAgent(ctx, agentID, args.Reason); err != nil {
if errors.Is(err, calendar.ErrNoActiveSlot) {
return errResult("no active slot for agent " + agentID)
}
return errResult("pause failed: " + err.Error())
}
return okResult("slot paused")
}
func toolCalendarResume(ctx context.Context, deps Deps) (sdkplugin.ToolResult, error) {
agentID := deps.AgentIDFromCtx(ctx)
if agentID == "" {
return errResult("calendar_resume: no agent context")
}
if err := deps.Scheduler.ResumeForAgent(ctx, agentID); err != nil {
if errors.Is(err, calendar.ErrNoActiveSlot) {
return errResult("no active slot for agent " + agentID)
}
return errResult("resume failed: " + err.Error())
}
return okResult("slot resumed")
}
func toolRestartStatus(deps Deps) (sdkplugin.ToolResult, error) {
// HarborForge backend doesn't expose a restart-pending endpoint
// (verified via /openapi.json) so we report the most recent
// heartbeat freshness instead. Useful for operators sanity-
// checking that the plugin's calendar loop is still alive.
sch := deps.Scheduler.Status()
return jsonResult(map[string]any{
"pending": false,
"last_heartbeats": sch.LastHeartbeats,
"observed_at": time.Now().UTC(),
})
}
// ---- result helpers ----
func jsonResult(v any) (sdkplugin.ToolResult, error) {
raw, err := json.MarshalIndent(v, "", " ")
if err != nil {
return sdkplugin.ToolResult{}, fmt.Errorf("encode tool result: %w", err)
}
return sdkplugin.ToolResult{
Content: []sdkplugin.ContentBlock{{Type: "text", Text: string(raw)}},
}, nil
}
func okResult(text string) (sdkplugin.ToolResult, error) {
return sdkplugin.ToolResult{
Content: []sdkplugin.ContentBlock{{Type: "text", Text: text}},
}, nil
}
func errResult(text string) (sdkplugin.ToolResult, error) {
if !strings.HasPrefix(text, "harborforge_") {
text = "harborforge: " + text
}
return sdkplugin.ToolResult{
IsError: true,
Content: []sdkplugin.ContentBlock{{Type: "text", Text: text}},
}, nil
}