fix: correct telemetry identifier and visibility when containerized
Three related fixes for running Monitor inside a container with /:/host:ro bind-mounted and network_mode: host. * config: when HF_MONITER_ROOTFS is set, read the default identifier from <rootFS>/etc/hostname instead of os.Hostname(). Under network_mode: host the UTS namespace is not shared, so os.Hostname() returns a random docker-assigned string that changes across recreations, causing the backend to treat each restart as a new server. * telemetry: log gopsutil errors from BuildPayload instead of silently swallowing them. Previously a missing /host mount would send a payload full of zeroed fields with no indication of failure. * docker-compose: drop the 'ports:' block. It is silently ignored under network_mode: host (the bridge server binds directly on the host's 127.0.0.1:MONITOR_PORT).
This commit is contained in:
@@ -15,8 +15,8 @@ services:
|
|||||||
- MONITOR_PORT=${MONITOR_PORT:-0}
|
- MONITOR_PORT=${MONITOR_PORT:-0}
|
||||||
volumes:
|
volumes:
|
||||||
- /:/host:ro
|
- /:/host:ro
|
||||||
ports:
|
# network_mode: host shares the host network namespace, so the bridge
|
||||||
# Expose MONITOR_PORT on 127.0.0.1 only for plugin communication.
|
# server (if MONITOR_PORT > 0) listens directly on the host's
|
||||||
# Only active when MONITOR_PORT > 0.
|
# 127.0.0.1:<MONITOR_PORT>. `ports:` is ignored under network_mode:
|
||||||
- "127.0.0.1:${MONITOR_PORT:-9100}:${MONITOR_PORT:-9100}"
|
# host, so it is intentionally omitted.
|
||||||
network_mode: host
|
network_mode: host
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
)
|
)
|
||||||
|
|
||||||
type Config struct {
|
type Config struct {
|
||||||
@@ -32,9 +33,19 @@ func Load(path string) (Config, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func LoadWithOverrides(path string, overrides Overrides) (Config, error) {
|
func LoadWithOverrides(path string, overrides Overrides) (Config, error) {
|
||||||
|
// If running inside a container with the host FS bind-mounted, prefer
|
||||||
|
// the host's /etc/hostname for the default identifier. The container's
|
||||||
|
// own os.Hostname() is a docker-assigned random string under
|
||||||
|
// network_mode: host (UTS namespace is not shared).
|
||||||
|
rootFSEarly := getenvAny([]string{"HF_MONITER_ROOTFS", "HF_MONITOR_ROOTFS"}, "")
|
||||||
|
defaultIdentifier := hostHostname(rootFSEarly)
|
||||||
|
if defaultIdentifier == "" {
|
||||||
|
defaultIdentifier = hostnameOr("unknown-host")
|
||||||
|
}
|
||||||
|
|
||||||
cfg := Config{
|
cfg := Config{
|
||||||
BackendURL: getenvAny([]string{"HF_MONITER_BACKEND_URL", "HF_MONITOR_BACKEND_URL"}, "https://monitor.hangman-lab.top"),
|
BackendURL: getenvAny([]string{"HF_MONITER_BACKEND_URL", "HF_MONITOR_BACKEND_URL"}, "https://monitor.hangman-lab.top"),
|
||||||
Identifier: getenvAny([]string{"HF_MONITER_IDENTIFIER", "HF_MONITOR_IDENTIFIER"}, hostnameOr("unknown-host")),
|
Identifier: getenvAny([]string{"HF_MONITER_IDENTIFIER", "HF_MONITOR_IDENTIFIER"}, defaultIdentifier),
|
||||||
APIKey: getenvAny([]string{"HF_MONITER_API_KEY", "HF_MONITOR_API_KEY"}, ""),
|
APIKey: getenvAny([]string{"HF_MONITER_API_KEY", "HF_MONITOR_API_KEY"}, ""),
|
||||||
ReportIntervalSec: getenvIntAny([]string{"HF_MONITER_REPORT_INTERVAL", "HF_MONITOR_REPORT_INTERVAL"}, 30),
|
ReportIntervalSec: getenvIntAny([]string{"HF_MONITER_REPORT_INTERVAL", "HF_MONITOR_REPORT_INTERVAL"}, 30),
|
||||||
LogLevel: getenvAny([]string{"HF_MONITER_LOG_LEVEL", "HF_MONITOR_LOG_LEVEL"}, "info"),
|
LogLevel: getenvAny([]string{"HF_MONITER_LOG_LEVEL", "HF_MONITOR_LOG_LEVEL"}, "info"),
|
||||||
@@ -153,11 +164,25 @@ func getenvIntAny(keys []string, fallback int) int {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func hostnameOr(fallback string) string {
|
func hostnameOr(fallback string) string {
|
||||||
name, err := os.Hostname()
|
if name, err := os.Hostname(); err == nil && name != "" {
|
||||||
if err != nil || name == "" {
|
|
||||||
return fallback
|
|
||||||
}
|
|
||||||
return name
|
return name
|
||||||
|
}
|
||||||
|
return fallback
|
||||||
|
}
|
||||||
|
|
||||||
|
// hostHostname reads the hostname from <rootFS>/etc/hostname. Used when
|
||||||
|
// Monitor runs inside a container and wants the host's hostname rather
|
||||||
|
// than the container's UTS namespace hostname (which docker randomizes
|
||||||
|
// unless hostname: is set).
|
||||||
|
func hostHostname(rootFS string) string {
|
||||||
|
if rootFS == "" {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
buf, err := os.ReadFile(filepath.Join(rootFS, "etc", "hostname"))
|
||||||
|
if err != nil {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
return strings.TrimSpace(string(buf))
|
||||||
}
|
}
|
||||||
|
|
||||||
func applyHostFSEnv(rootFS string) {
|
func applyHostFSEnv(rootFS string) {
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"log"
|
||||||
"net/http"
|
"net/http"
|
||||||
"os"
|
"os"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
@@ -50,12 +51,15 @@ func BuildPayload(ctx context.Context, cfg config.Config) (Payload, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
cpuPct, err := cpu.PercentWithContext(ctx, time.Second, false)
|
cpuPct, err := cpu.PercentWithContext(ctx, time.Second, false)
|
||||||
if err == nil && len(cpuPct) > 0 {
|
if err != nil {
|
||||||
|
log.Printf("telemetry: cpu.Percent failed: %v", err)
|
||||||
|
} else if len(cpuPct) > 0 {
|
||||||
payload.CPUPct = round1(cpuPct[0])
|
payload.CPUPct = round1(cpuPct[0])
|
||||||
}
|
}
|
||||||
|
|
||||||
vm, err := mem.VirtualMemoryWithContext(ctx)
|
if vm, err := mem.VirtualMemoryWithContext(ctx); err != nil {
|
||||||
if err == nil {
|
log.Printf("telemetry: mem.VirtualMemory failed: %v", err)
|
||||||
|
} else {
|
||||||
payload.MemPct = round1(vm.UsedPercent)
|
payload.MemPct = round1(vm.UsedPercent)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -63,28 +67,33 @@ func BuildPayload(ctx context.Context, cfg config.Config) (Payload, error) {
|
|||||||
if diskPath == "" {
|
if diskPath == "" {
|
||||||
diskPath = "/"
|
diskPath = "/"
|
||||||
}
|
}
|
||||||
diskUsage, err := disk.UsageWithContext(ctx, diskPath)
|
if diskUsage, err := disk.UsageWithContext(ctx, diskPath); err != nil {
|
||||||
if err == nil {
|
log.Printf("telemetry: disk.Usage(%s) failed: %v", diskPath, err)
|
||||||
|
} else {
|
||||||
payload.DiskPct = round1(diskUsage.UsedPercent)
|
payload.DiskPct = round1(diskUsage.UsedPercent)
|
||||||
}
|
}
|
||||||
|
|
||||||
swapUsage, err := mem.SwapMemoryWithContext(ctx)
|
if swapUsage, err := mem.SwapMemoryWithContext(ctx); err != nil {
|
||||||
if err == nil {
|
log.Printf("telemetry: mem.SwapMemory failed: %v", err)
|
||||||
|
} else {
|
||||||
payload.SwapPct = round1(swapUsage.UsedPercent)
|
payload.SwapPct = round1(swapUsage.UsedPercent)
|
||||||
}
|
}
|
||||||
|
|
||||||
avg, err := gopsload.AvgWithContext(ctx)
|
if avg, err := gopsload.AvgWithContext(ctx); err != nil {
|
||||||
if err == nil {
|
log.Printf("telemetry: load.Avg failed: %v", err)
|
||||||
|
} else {
|
||||||
payload.LoadAvg = []float64{round2(avg.Load1), round2(avg.Load5), round2(avg.Load15)}
|
payload.LoadAvg = []float64{round2(avg.Load1), round2(avg.Load5), round2(avg.Load15)}
|
||||||
}
|
}
|
||||||
|
|
||||||
hostInfo, err := host.InfoWithContext(ctx)
|
if hostInfo, err := host.InfoWithContext(ctx); err != nil {
|
||||||
if err == nil {
|
log.Printf("telemetry: host.Info failed: %v", err)
|
||||||
|
} else {
|
||||||
payload.UptimeSeconds = hostInfo.Uptime
|
payload.UptimeSeconds = hostInfo.Uptime
|
||||||
}
|
}
|
||||||
|
|
||||||
nginxInstalled, nginxSites, err := detectNginx(cfg.RootFS)
|
if nginxInstalled, nginxSites, err := detectNginx(cfg.RootFS); err != nil {
|
||||||
if err == nil {
|
log.Printf("telemetry: detectNginx failed: %v", err)
|
||||||
|
} else {
|
||||||
payload.NginxInstalled = nginxInstalled
|
payload.NginxInstalled = nginxInstalled
|
||||||
payload.NginxSites = nginxSites
|
payload.NginxSites = nginxSites
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user