Adds a periodic POST loop to <backend>/monitor/server/heartbeat so HF plugin can take over the standalone harborforge-monitor daemon's job — same X-API-Key header, same flat telemetry shape (cpu_pct / mem_pct / disk_pct / swap_pct / load_avg / uptime_seconds / plugin_version / agents[]). HF backend stays unchanged. Config: monitor_push_enabled (default false; opt-in to avoid surprise heartbeats from existing deployments), monitor_push_interval_seconds (default 30), reuses apiKey for the X-API-Key header. Lift the container's HF_MONITER_API_KEY into config.apiKey, flip monitor_push_enabled true, then docker rm -f the container — DB last_seen_at keeps advancing under the plugin's loop. Collector grew swap + cpu sampling (two reads of /proc/stat over a 1-second window when SampleCPU=true). Bridge endpoint stays cheap (SampleCPU=false on demand); push loop is the only caller paying the sampling cost. E2E in sim: monitor_push_enabled=true + apiKey from injected MonitoredServer row → server_states.last_seen_at advances exactly every interval_seconds (10s configured, 10s observed). cpu/mem/disk/ swap_pct all populate correctly.
243 lines
7.9 KiB
Go
243 lines
7.9 KiB
Go
// Package tools wires the 9 harborforge_* tool implementations to
|
|
// the plugin's runtime state (config, telemetry collector, monitor
|
|
// bridge, calendar scheduler). Each tool is a CallTool dispatch
|
|
// branch in main.go's plugin; this package holds the shared logic.
|
|
|
|
package tools
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"errors"
|
|
"fmt"
|
|
"strings"
|
|
"time"
|
|
|
|
sdkplugin "git.hangman-lab.top/hzhang/Plexum-sdk-go/plugin"
|
|
|
|
"git.hangman-lab.top/zhi/HarborForge.PlexumPlugin/internal/calendar"
|
|
"git.hangman-lab.top/zhi/HarborForge.PlexumPlugin/internal/config"
|
|
"git.hangman-lab.top/zhi/HarborForge.PlexumPlugin/internal/monitor"
|
|
"git.hangman-lab.top/zhi/HarborForge.PlexumPlugin/internal/telemetry"
|
|
)
|
|
|
|
// Deps is the bundle main.go passes when constructing the tool router.
|
|
type Deps struct {
|
|
Config config.Resolved
|
|
Version string
|
|
Collect func() telemetry.Snapshot
|
|
Bridge *monitor.Bridge
|
|
Pusher *monitor.Pusher
|
|
Scheduler *calendar.Scheduler
|
|
Host sdkplugin.HostAPI
|
|
|
|
// AgentIDFromCtx returns the agent id the call belongs to. Plexum
|
|
// host injects this via the tool dispatch context; main.go's
|
|
// CallTool reads it from the ctx and stashes here.
|
|
AgentIDFromCtx func(ctx context.Context) string
|
|
}
|
|
|
|
// Dispatch is the entry point main.go's ToolPlugin.CallTool calls.
|
|
// Returns the canonical text response. Errors come through as
|
|
// is_error=true ToolResult rather than RPC errors so the model sees
|
|
// human-readable detail.
|
|
func Dispatch(ctx context.Context, deps Deps, name string, input json.RawMessage) (sdkplugin.ToolResult, error) {
|
|
switch name {
|
|
case "harborforge_status":
|
|
return toolStatus(deps)
|
|
case "harborforge_telemetry":
|
|
return toolTelemetry(deps)
|
|
case "harborforge_monitor_telemetry":
|
|
return toolMonitorTelemetry(deps)
|
|
case "harborforge_calendar_status":
|
|
return toolCalendarStatus(deps)
|
|
case "harborforge_calendar_complete":
|
|
return toolCalendarComplete(ctx, deps, input)
|
|
case "harborforge_calendar_abort":
|
|
return toolCalendarAbort(ctx, deps, input)
|
|
case "harborforge_calendar_pause":
|
|
return toolCalendarPause(ctx, deps, input)
|
|
case "harborforge_calendar_resume":
|
|
return toolCalendarResume(ctx, deps)
|
|
case "harborforge_restart_status":
|
|
return toolRestartStatus(deps)
|
|
}
|
|
return sdkplugin.ToolResult{
|
|
IsError: true,
|
|
Content: []sdkplugin.ContentBlock{{Type: "text", Text: "unknown tool: " + name}},
|
|
}, nil
|
|
}
|
|
|
|
func toolStatus(deps Deps) (sdkplugin.ToolResult, error) {
|
|
bs := deps.Bridge.Stats()
|
|
sch := deps.Scheduler.Status()
|
|
out := map[string]any{
|
|
"plugin": map[string]any{
|
|
"name": "harbor-forge",
|
|
"version": deps.Version,
|
|
"backend": "plexum",
|
|
},
|
|
"config": map[string]any{
|
|
"backend_url": deps.Config.BackendURL,
|
|
"identifier": deps.Config.Identifier,
|
|
"monitor_port": deps.Config.MonitorPort,
|
|
"calendar_enabled": deps.Config.CalendarEnabled,
|
|
"calendar_backendurl": deps.Config.CalendarBackendURL,
|
|
},
|
|
"monitor_bridge": map[string]any{
|
|
"listening": bs.Listening,
|
|
"port": bs.Port,
|
|
"queries": bs.Queries,
|
|
"last_query": bs.LastQuery,
|
|
},
|
|
"monitor_push": monitorPushSummary(deps),
|
|
"calendar": sch,
|
|
}
|
|
return jsonResult(out)
|
|
}
|
|
|
|
// monitorPushSummary returns the pusher's last-known state in the same
|
|
// JSON layout the status/monitor_telemetry tools surface. Nil-safe: if
|
|
// no pusher is wired (testing, push disabled), reports enabled=false.
|
|
func monitorPushSummary(deps Deps) map[string]any {
|
|
out := map[string]any{
|
|
"enabled": deps.Config.MonitorPushEnabled,
|
|
"interval_seconds": deps.Config.MonitorPushIntervalSeconds,
|
|
"endpoint": deps.Config.BackendURL + "/monitor/server/heartbeat",
|
|
}
|
|
if deps.Pusher != nil {
|
|
st := deps.Pusher.Stats()
|
|
out["last_sent_at"] = st.LastSentAt
|
|
out["last_status"] = st.LastStatus
|
|
out["last_err"] = st.LastErr
|
|
out["success_count"] = st.SuccessCount
|
|
out["error_count"] = st.ErrorCount
|
|
}
|
|
return out
|
|
}
|
|
|
|
func toolTelemetry(deps Deps) (sdkplugin.ToolResult, error) {
|
|
return jsonResult(deps.Collect())
|
|
}
|
|
|
|
func toolMonitorTelemetry(deps Deps) (sdkplugin.ToolResult, error) {
|
|
bs := deps.Bridge.Stats()
|
|
return jsonResult(map[string]any{
|
|
"bridge": map[string]any{
|
|
"port": bs.Port,
|
|
"listening": bs.Listening,
|
|
"queries": bs.Queries,
|
|
"last_query": bs.LastQuery,
|
|
"last_snapshot": bs.LastSnap,
|
|
},
|
|
"push": monitorPushSummary(deps),
|
|
})
|
|
}
|
|
|
|
func toolCalendarStatus(deps Deps) (sdkplugin.ToolResult, error) {
|
|
return jsonResult(deps.Scheduler.Status())
|
|
}
|
|
|
|
func toolCalendarComplete(ctx context.Context, deps Deps, input json.RawMessage) (sdkplugin.ToolResult, error) {
|
|
var args struct{ Summary string `json:"summary"` }
|
|
_ = json.Unmarshal(input, &args)
|
|
agentID := deps.AgentIDFromCtx(ctx)
|
|
if agentID == "" {
|
|
return errResult("calendar_complete: no agent context")
|
|
}
|
|
if err := deps.Scheduler.CompleteForAgent(ctx, agentID, args.Summary); err != nil {
|
|
if errors.Is(err, calendar.ErrNoActiveSlot) {
|
|
return errResult("no active slot for agent " + agentID)
|
|
}
|
|
return errResult("complete failed: " + err.Error())
|
|
}
|
|
return okResult("slot marked completed")
|
|
}
|
|
|
|
func toolCalendarAbort(ctx context.Context, deps Deps, input json.RawMessage) (sdkplugin.ToolResult, error) {
|
|
var args struct{ Reason string `json:"reason"` }
|
|
_ = json.Unmarshal(input, &args)
|
|
agentID := deps.AgentIDFromCtx(ctx)
|
|
if agentID == "" {
|
|
return errResult("calendar_abort: no agent context")
|
|
}
|
|
if err := deps.Scheduler.AbortForAgent(ctx, agentID, args.Reason); err != nil {
|
|
if errors.Is(err, calendar.ErrNoActiveSlot) {
|
|
return errResult("no active slot for agent " + agentID)
|
|
}
|
|
return errResult("abort failed: " + err.Error())
|
|
}
|
|
return okResult("slot aborted")
|
|
}
|
|
|
|
func toolCalendarPause(ctx context.Context, deps Deps, input json.RawMessage) (sdkplugin.ToolResult, error) {
|
|
var args struct{ Reason string `json:"reason"` }
|
|
_ = json.Unmarshal(input, &args)
|
|
agentID := deps.AgentIDFromCtx(ctx)
|
|
if agentID == "" {
|
|
return errResult("calendar_pause: no agent context")
|
|
}
|
|
if err := deps.Scheduler.PauseForAgent(ctx, agentID, args.Reason); err != nil {
|
|
if errors.Is(err, calendar.ErrNoActiveSlot) {
|
|
return errResult("no active slot for agent " + agentID)
|
|
}
|
|
return errResult("pause failed: " + err.Error())
|
|
}
|
|
return okResult("slot paused")
|
|
}
|
|
|
|
func toolCalendarResume(ctx context.Context, deps Deps) (sdkplugin.ToolResult, error) {
|
|
agentID := deps.AgentIDFromCtx(ctx)
|
|
if agentID == "" {
|
|
return errResult("calendar_resume: no agent context")
|
|
}
|
|
if err := deps.Scheduler.ResumeForAgent(ctx, agentID); err != nil {
|
|
if errors.Is(err, calendar.ErrNoActiveSlot) {
|
|
return errResult("no active slot for agent " + agentID)
|
|
}
|
|
return errResult("resume failed: " + err.Error())
|
|
}
|
|
return okResult("slot resumed")
|
|
}
|
|
|
|
func toolRestartStatus(deps Deps) (sdkplugin.ToolResult, error) {
|
|
// HarborForge backend doesn't expose a restart-pending endpoint
|
|
// (verified via /openapi.json) so we report the most recent
|
|
// heartbeat freshness instead. Useful for operators sanity-
|
|
// checking that the plugin's calendar loop is still alive.
|
|
sch := deps.Scheduler.Status()
|
|
return jsonResult(map[string]any{
|
|
"pending": false,
|
|
"last_heartbeats": sch.LastHeartbeats,
|
|
"observed_at": time.Now().UTC(),
|
|
})
|
|
}
|
|
|
|
// ---- result helpers ----
|
|
|
|
func jsonResult(v any) (sdkplugin.ToolResult, error) {
|
|
raw, err := json.MarshalIndent(v, "", " ")
|
|
if err != nil {
|
|
return sdkplugin.ToolResult{}, fmt.Errorf("encode tool result: %w", err)
|
|
}
|
|
return sdkplugin.ToolResult{
|
|
Content: []sdkplugin.ContentBlock{{Type: "text", Text: string(raw)}},
|
|
}, nil
|
|
}
|
|
|
|
func okResult(text string) (sdkplugin.ToolResult, error) {
|
|
return sdkplugin.ToolResult{
|
|
Content: []sdkplugin.ContentBlock{{Type: "text", Text: text}},
|
|
}, nil
|
|
}
|
|
|
|
func errResult(text string) (sdkplugin.ToolResult, error) {
|
|
if !strings.HasPrefix(text, "harborforge_") {
|
|
text = "harborforge: " + text
|
|
}
|
|
return sdkplugin.ToolResult{
|
|
IsError: true,
|
|
Content: []sdkplugin.ContentBlock{{Type: "text", Text: text}},
|
|
}, nil
|
|
}
|