Compare commits

..

5 Commits

Author SHA1 Message Date
zhi
e136f1b290 fix: correct telemetry identifier and visibility when containerized
Three related fixes for running Monitor inside a container with
/:/host:ro bind-mounted and network_mode: host.

* config: when HF_MONITER_ROOTFS is set, read the default identifier
  from <rootFS>/etc/hostname instead of os.Hostname(). Under
  network_mode: host the UTS namespace is not shared, so os.Hostname()
  returns a random docker-assigned string that changes across
  recreations, causing the backend to treat each restart as a new
  server.

* telemetry: log gopsutil errors from BuildPayload instead of silently
  swallowing them. Previously a missing /host mount would send a
  payload full of zeroed fields with no indication of failure.

* docker-compose: drop the 'ports:' block. It is silently ignored
  under network_mode: host (the bridge server binds directly on the
  host's 127.0.0.1:MONITOR_PORT).
2026-04-15 23:02:44 +00:00
758d3d1c59 refactor: use heartbeat endpoint consistently 2026-04-04 08:05:49 +00:00
65f521dce0 feat: support monitor cli override flags 2026-04-04 07:53:34 +00:00
6e60fae559 Merge pull request 'Merge dev-2026-03-21 into main' (#1) from dev-2026-03-21 into main
Reviewed-on: #1
2026-03-22 14:16:01 +00:00
zhi
dc05fa01d1 feat: add POST /openclaw endpoint and enrich heartbeats with OpenClaw metadata
- Bridge server now accepts POST /openclaw from OpenClaw plugin
- OpenClawMeta struct stores version, plugin_version, and agents
- Heartbeat sendOnce() enriches payload with plugin metadata when available
- Telemetry Payload adds optional openclaw_version field
- README updated to document /openclaw endpoint and metadata enrichment
- All communication remains optional — Monitor functions without plugin data
2026-03-22 01:37:15 +00:00
6 changed files with 206 additions and 29 deletions

View File

@@ -19,7 +19,7 @@
客户端调用: 客户端调用:
- `POST /monitor/server/heartbeat-v2` - `POST /monitor/server/heartbeat`
- Header: `X-API-Key` - Header: `X-API-Key`
## 项目结构 ## 项目结构
@@ -66,7 +66,7 @@ HarborForge.Monitor/
### MONITOR_PORT — 插件桥接端口 ### MONITOR_PORT — 插件桥接端口
`MONITOR_PORT` (或 `monitorPort`) 设置为大于 0 的值时Monitor 会在 `127.0.0.1:<MONITOR_PORT>` 上启动一个本地 HTTP 服务,供 HarborForge OpenClaw 插件查询遥测数据。 `MONITOR_PORT` 设置为大于 0 的值时Monitor 会在 `127.0.0.1:<MONITOR_PORT>` 上启动一个本地 HTTP 服务,供 HarborForge OpenClaw 插件查询遥测数据。
支持的端点: 支持的端点:
@@ -74,6 +74,17 @@ HarborForge.Monitor/
|------|------| |------|------|
| `GET /health` | 健康检查,返回 Monitor 版本和标识符 | | `GET /health` | 健康检查,返回 Monitor 版本和标识符 |
| `GET /telemetry` | 返回最新的遥测数据快照 | | `GET /telemetry` | 返回最新的遥测数据快照 |
| `POST /openclaw` | 接收 OpenClaw 插件推送的元数据(版本、代理等) |
### OpenClaw 元数据 enrichment
当 OpenClaw 插件通过 `POST /openclaw` 推送元数据后Monitor 会在后续的心跳上报中自动将这些信息附加到遥测数据中:
- `openclaw_version` — OpenClaw 运行时版本
- `plugin_version` — 插件版本
- `agents` — 代理列表
如果插件从未推送过元数据,这些字段会被省略,心跳上报完全不受影响。
**重要**:桥接端口是可选的。如果 `MONITOR_PORT` 为 0 或未设置桥接服务不会启动Monitor 的心跳上报功能完全不受影响。即使桥接服务启动失败,心跳上报也会继续正常工作。 **重要**:桥接端口是可选的。如果 `MONITOR_PORT` 为 0 或未设置桥接服务不会启动Monitor 的心跳上报功能完全不受影响。即使桥接服务启动失败,心跳上报也会继续正常工作。

View File

@@ -24,12 +24,26 @@ func main() {
printPayload bool printPayload bool
dryRun bool dryRun bool
showVersion bool showVersion bool
backendURL string
identifier string
apiKey string
reportInt int
logLevel string
rootFS string
monitorPort int
) )
flag.StringVar(&configPath, "config", "/etc/harborforge-monitor/config.json", "Path to config file") flag.StringVar(&configPath, "config", "/etc/harborforge-monitor/config.json", "Path to config file")
flag.BoolVar(&runOnce, "once", false, "Collect and send telemetry once, then exit") flag.BoolVar(&runOnce, "once", false, "Collect and send telemetry once, then exit")
flag.BoolVar(&printPayload, "print-payload", false, "Print payload JSON before sending") flag.BoolVar(&printPayload, "print-payload", false, "Print payload JSON before sending")
flag.BoolVar(&dryRun, "dry-run", false, "Collect telemetry but do not send it") flag.BoolVar(&dryRun, "dry-run", false, "Collect telemetry but do not send it")
flag.BoolVar(&showVersion, "version", false, "Print version and exit") flag.BoolVar(&showVersion, "version", false, "Print version and exit")
flag.StringVar(&backendURL, "backend-url", "", "Override backend URL")
flag.StringVar(&identifier, "identifier", "", "Override identifier")
flag.StringVar(&apiKey, "api-key", "", "Override API key")
flag.IntVar(&reportInt, "report-interval", 0, "Override report interval in seconds")
flag.StringVar(&logLevel, "log-level", "", "Override log level")
flag.StringVar(&rootFS, "rootfs", "", "Override root filesystem path")
flag.IntVar(&monitorPort, "monitor-port", 0, "Override monitor bridge port")
flag.Parse() flag.Parse()
if showVersion { if showVersion {
@@ -37,7 +51,15 @@ func main() {
return return
} }
cfg, err := config.Load(configPath) cfg, err := config.LoadWithOverrides(configPath, config.Overrides{
BackendURL: backendURL,
Identifier: identifier,
APIKey: apiKey,
ReportIntervalSec: reportInt,
LogLevel: logLevel,
RootFS: rootFS,
MonitorPort: monitorPort,
})
if err != nil { if err != nil {
log.Fatalf("load config: %v", err) log.Fatalf("load config: %v", err)
} }
@@ -73,6 +95,19 @@ func main() {
// Update bridge with latest telemetry // Update bridge with latest telemetry
if bridgeSrv != nil { if bridgeSrv != nil {
bridgeSrv.UpdatePayload(payload) bridgeSrv.UpdatePayload(payload)
// Enrich payload with OpenClaw metadata if available
if meta := bridgeSrv.GetOpenClawMeta(); meta != nil {
if meta.Version != "" {
payload.OpenClawVersion = meta.Version
}
if meta.PluginVersion != "" {
payload.PluginVersion = meta.PluginVersion
}
if len(meta.Agents) > 0 {
payload.Agents = meta.Agents
}
}
} }
if printPayload || dryRun { if printPayload || dryRun {

View File

@@ -15,8 +15,8 @@ services:
- MONITOR_PORT=${MONITOR_PORT:-0} - MONITOR_PORT=${MONITOR_PORT:-0}
volumes: volumes:
- /:/host:ro - /:/host:ro
ports: # network_mode: host shares the host network namespace, so the bridge
# Expose MONITOR_PORT on 127.0.0.1 only for plugin communication. # server (if MONITOR_PORT > 0) listens directly on the host's
# Only active when MONITOR_PORT > 0. # 127.0.0.1:<MONITOR_PORT>. `ports:` is ignored under network_mode:
- "127.0.0.1:${MONITOR_PORT:-9100}:${MONITOR_PORT:-9100}" # host, so it is intentionally omitted.
network_mode: host network_mode: host

View File

@@ -1,15 +1,19 @@
// Package bridge provides a local HTTP server on MONITOR_PORT for // Package bridge provides a local HTTP server on MONITOR_PORT for
// communication between the HarborForge OpenClaw plugin and Monitor. // communication between the HarborForge OpenClaw plugin and Monitor.
// //
// The plugin queries this endpoint to enrich its telemetry with // The bridge serves two purposes:
// host/hardware data. The bridge is optional: if monitorPort is 0 // 1. Expose hardware telemetry to the plugin via GET /telemetry
// or not set, the bridge is not started and Monitor operates normally. // 2. Receive OpenClaw metadata from the plugin via POST /openclaw
//
// The bridge is optional: if monitorPort is 0 or not set, the bridge
// is not started and Monitor operates normally.
package bridge package bridge
import ( import (
"context" "context"
"encoding/json" "encoding/json"
"fmt" "fmt"
"io"
"log" "log"
"net" "net"
"net/http" "net/http"
@@ -20,6 +24,14 @@ import (
"git.hangman-lab.top/zhi/HarborForge.Monitor/internal/telemetry" "git.hangman-lab.top/zhi/HarborForge.Monitor/internal/telemetry"
) )
// OpenClawMeta holds metadata received from the OpenClaw plugin.
// This data is optional enrichment for heartbeat uploads.
type OpenClawMeta struct {
Version string `json:"version"`
PluginVersion string `json:"plugin_version"`
Agents []any `json:"agents,omitempty"`
}
// Server is the local bridge HTTP server. // Server is the local bridge HTTP server.
type Server struct { type Server struct {
cfg config.Config cfg config.Config
@@ -29,6 +41,9 @@ type Server struct {
mu sync.RWMutex mu sync.RWMutex
lastPayload *telemetry.Payload lastPayload *telemetry.Payload
lastUpdated time.Time lastUpdated time.Time
openclawMeta *OpenClawMeta
openclawUpdated time.Time
} }
// New creates a bridge server. It does not start listening. // New creates a bridge server. It does not start listening.
@@ -57,6 +72,14 @@ type bridgeResponse struct {
LastUpdated *time.Time `json:"last_updated,omitempty"` LastUpdated *time.Time `json:"last_updated,omitempty"`
} }
// GetOpenClawMeta returns the latest OpenClaw metadata received from
// the plugin, or nil if no metadata has been received.
func (s *Server) GetOpenClawMeta() *OpenClawMeta {
s.mu.RLock()
defer s.mu.RUnlock()
return s.openclawMeta
}
func (s *Server) handler() http.Handler { func (s *Server) handler() http.Handler {
mux := http.NewServeMux() mux := http.NewServeMux()
@@ -91,6 +114,40 @@ func (s *Server) handler() http.Handler {
json.NewEncoder(w).Encode(resp) json.NewEncoder(w).Encode(resp)
}) })
// OpenClaw metadata endpoint — plugin POSTs its metadata here
mux.HandleFunc("/openclaw", func(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodPost {
http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
return
}
body, err := io.ReadAll(io.LimitReader(r.Body, 64*1024))
if err != nil {
http.Error(w, "read error", http.StatusBadRequest)
return
}
defer r.Body.Close()
var meta OpenClawMeta
if err := json.Unmarshal(body, &meta); err != nil {
http.Error(w, "invalid json", http.StatusBadRequest)
return
}
s.mu.Lock()
s.openclawMeta = &meta
s.openclawUpdated = time.Now()
s.mu.Unlock()
s.logger.Printf("received OpenClaw metadata: version=%s plugin=%s agents=%d",
meta.Version, meta.PluginVersion, len(meta.Agents))
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(map[string]string{
"status": "ok",
})
})
return mux return mux
} }

View File

@@ -5,6 +5,7 @@ import (
"fmt" "fmt"
"os" "os"
"path/filepath" "path/filepath"
"strings"
) )
type Config struct { type Config struct {
@@ -17,10 +18,34 @@ type Config struct {
MonitorPort int `json:"monitorPort"` MonitorPort int `json:"monitorPort"`
} }
type Overrides struct {
BackendURL string
Identifier string
APIKey string
ReportIntervalSec int
LogLevel string
RootFS string
MonitorPort int
}
func Load(path string) (Config, error) { func Load(path string) (Config, error) {
return LoadWithOverrides(path, Overrides{})
}
func LoadWithOverrides(path string, overrides Overrides) (Config, error) {
// If running inside a container with the host FS bind-mounted, prefer
// the host's /etc/hostname for the default identifier. The container's
// own os.Hostname() is a docker-assigned random string under
// network_mode: host (UTS namespace is not shared).
rootFSEarly := getenvAny([]string{"HF_MONITER_ROOTFS", "HF_MONITOR_ROOTFS"}, "")
defaultIdentifier := hostHostname(rootFSEarly)
if defaultIdentifier == "" {
defaultIdentifier = hostnameOr("unknown-host")
}
cfg := Config{ cfg := Config{
BackendURL: getenvAny([]string{"HF_MONITER_BACKEND_URL", "HF_MONITOR_BACKEND_URL"}, "https://monitor.hangman-lab.top"), BackendURL: getenvAny([]string{"HF_MONITER_BACKEND_URL", "HF_MONITOR_BACKEND_URL"}, "https://monitor.hangman-lab.top"),
Identifier: getenvAny([]string{"HF_MONITER_IDENTIFIER", "HF_MONITOR_IDENTIFIER"}, hostnameOr("unknown-host")), Identifier: getenvAny([]string{"HF_MONITER_IDENTIFIER", "HF_MONITOR_IDENTIFIER"}, defaultIdentifier),
APIKey: getenvAny([]string{"HF_MONITER_API_KEY", "HF_MONITOR_API_KEY"}, ""), APIKey: getenvAny([]string{"HF_MONITER_API_KEY", "HF_MONITOR_API_KEY"}, ""),
ReportIntervalSec: getenvIntAny([]string{"HF_MONITER_REPORT_INTERVAL", "HF_MONITOR_REPORT_INTERVAL"}, 30), ReportIntervalSec: getenvIntAny([]string{"HF_MONITER_REPORT_INTERVAL", "HF_MONITOR_REPORT_INTERVAL"}, 30),
LogLevel: getenvAny([]string{"HF_MONITER_LOG_LEVEL", "HF_MONITOR_LOG_LEVEL"}, "info"), LogLevel: getenvAny([]string{"HF_MONITER_LOG_LEVEL", "HF_MONITOR_LOG_LEVEL"}, "info"),
@@ -46,6 +71,28 @@ func Load(path string) (Config, error) {
cfg.RootFS = getenvAny([]string{"HF_MONITER_ROOTFS", "HF_MONITOR_ROOTFS"}, cfg.RootFS) cfg.RootFS = getenvAny([]string{"HF_MONITER_ROOTFS", "HF_MONITOR_ROOTFS"}, cfg.RootFS)
cfg.MonitorPort = getenvIntAny([]string{"MONITOR_PORT", "HF_MONITOR_PORT"}, cfg.MonitorPort) cfg.MonitorPort = getenvIntAny([]string{"MONITOR_PORT", "HF_MONITOR_PORT"}, cfg.MonitorPort)
if overrides.BackendURL != "" {
cfg.BackendURL = overrides.BackendURL
}
if overrides.Identifier != "" {
cfg.Identifier = overrides.Identifier
}
if overrides.APIKey != "" {
cfg.APIKey = overrides.APIKey
}
if overrides.ReportIntervalSec > 0 {
cfg.ReportIntervalSec = overrides.ReportIntervalSec
}
if overrides.LogLevel != "" {
cfg.LogLevel = overrides.LogLevel
}
if overrides.RootFS != "" {
cfg.RootFS = overrides.RootFS
}
if overrides.MonitorPort > 0 {
cfg.MonitorPort = overrides.MonitorPort
}
if cfg.BackendURL == "" { if cfg.BackendURL == "" {
return cfg, fmt.Errorf("backendUrl is required") return cfg, fmt.Errorf("backendUrl is required")
} }
@@ -117,11 +164,25 @@ func getenvIntAny(keys []string, fallback int) int {
} }
func hostnameOr(fallback string) string { func hostnameOr(fallback string) string {
name, err := os.Hostname() if name, err := os.Hostname(); err == nil && name != "" {
if err != nil || name == "" { return name
return fallback
} }
return name return fallback
}
// hostHostname reads the hostname from <rootFS>/etc/hostname. Used when
// Monitor runs inside a container and wants the host's hostname rather
// than the container's UTS namespace hostname (which docker randomizes
// unless hostname: is set).
func hostHostname(rootFS string) string {
if rootFS == "" {
return ""
}
buf, err := os.ReadFile(filepath.Join(rootFS, "etc", "hostname"))
if err != nil {
return ""
}
return strings.TrimSpace(string(buf))
} }
func applyHostFSEnv(rootFS string) { func applyHostFSEnv(rootFS string) {

View File

@@ -4,6 +4,7 @@ import (
"context" "context"
"encoding/json" "encoding/json"
"fmt" "fmt"
"log"
"net/http" "net/http"
"os" "os"
"os/exec" "os/exec"
@@ -35,6 +36,10 @@ type Payload struct {
SwapPct float64 `json:"swap_pct,omitempty"` SwapPct float64 `json:"swap_pct,omitempty"`
LoadAvg []float64 `json:"load_avg,omitempty"` LoadAvg []float64 `json:"load_avg,omitempty"`
UptimeSeconds uint64 `json:"uptime_seconds,omitempty"` UptimeSeconds uint64 `json:"uptime_seconds,omitempty"`
// Optional OpenClaw metadata, enriched from plugin bridge.
// These fields are omitted if no plugin data is available.
OpenClawVersion string `json:"openclaw_version,omitempty"`
} }
func BuildPayload(ctx context.Context, cfg config.Config) (Payload, error) { func BuildPayload(ctx context.Context, cfg config.Config) (Payload, error) {
@@ -46,12 +51,15 @@ func BuildPayload(ctx context.Context, cfg config.Config) (Payload, error) {
} }
cpuPct, err := cpu.PercentWithContext(ctx, time.Second, false) cpuPct, err := cpu.PercentWithContext(ctx, time.Second, false)
if err == nil && len(cpuPct) > 0 { if err != nil {
log.Printf("telemetry: cpu.Percent failed: %v", err)
} else if len(cpuPct) > 0 {
payload.CPUPct = round1(cpuPct[0]) payload.CPUPct = round1(cpuPct[0])
} }
vm, err := mem.VirtualMemoryWithContext(ctx) if vm, err := mem.VirtualMemoryWithContext(ctx); err != nil {
if err == nil { log.Printf("telemetry: mem.VirtualMemory failed: %v", err)
} else {
payload.MemPct = round1(vm.UsedPercent) payload.MemPct = round1(vm.UsedPercent)
} }
@@ -59,28 +67,33 @@ func BuildPayload(ctx context.Context, cfg config.Config) (Payload, error) {
if diskPath == "" { if diskPath == "" {
diskPath = "/" diskPath = "/"
} }
diskUsage, err := disk.UsageWithContext(ctx, diskPath) if diskUsage, err := disk.UsageWithContext(ctx, diskPath); err != nil {
if err == nil { log.Printf("telemetry: disk.Usage(%s) failed: %v", diskPath, err)
} else {
payload.DiskPct = round1(diskUsage.UsedPercent) payload.DiskPct = round1(diskUsage.UsedPercent)
} }
swapUsage, err := mem.SwapMemoryWithContext(ctx) if swapUsage, err := mem.SwapMemoryWithContext(ctx); err != nil {
if err == nil { log.Printf("telemetry: mem.SwapMemory failed: %v", err)
} else {
payload.SwapPct = round1(swapUsage.UsedPercent) payload.SwapPct = round1(swapUsage.UsedPercent)
} }
avg, err := gopsload.AvgWithContext(ctx) if avg, err := gopsload.AvgWithContext(ctx); err != nil {
if err == nil { log.Printf("telemetry: load.Avg failed: %v", err)
} else {
payload.LoadAvg = []float64{round2(avg.Load1), round2(avg.Load5), round2(avg.Load15)} payload.LoadAvg = []float64{round2(avg.Load1), round2(avg.Load5), round2(avg.Load15)}
} }
hostInfo, err := host.InfoWithContext(ctx) if hostInfo, err := host.InfoWithContext(ctx); err != nil {
if err == nil { log.Printf("telemetry: host.Info failed: %v", err)
} else {
payload.UptimeSeconds = hostInfo.Uptime payload.UptimeSeconds = hostInfo.Uptime
} }
nginxInstalled, nginxSites, err := detectNginx(cfg.RootFS) if nginxInstalled, nginxSites, err := detectNginx(cfg.RootFS); err != nil {
if err == nil { log.Printf("telemetry: detectNginx failed: %v", err)
} else {
payload.NginxInstalled = nginxInstalled payload.NginxInstalled = nginxInstalled
payload.NginxSites = nginxSites payload.NginxSites = nginxSites
} }
@@ -94,7 +107,7 @@ func Send(ctx context.Context, client *http.Client, cfg config.Config, payload P
return fmt.Errorf("marshal payload: %w", err) return fmt.Errorf("marshal payload: %w", err)
} }
req, err := http.NewRequestWithContext(ctx, http.MethodPost, strings.TrimRight(cfg.BackendURL, "/")+"/monitor/server/heartbeat-v2", strings.NewReader(string(body))) req, err := http.NewRequestWithContext(ctx, http.MethodPost, strings.TrimRight(cfg.BackendURL, "/")+"/monitor/server/heartbeat", strings.NewReader(string(body)))
if err != nil { if err != nil {
return fmt.Errorf("build request: %w", err) return fmt.Errorf("build request: %w", err)
} }