fix(monitor): harden server delete and remove challenge docs
- Delete server state before monitored server to avoid FK 500s - Keep legacy cleanup for obsolete challenge tables - Rewrite monitor docs to API key-only flow
This commit is contained in:
@@ -5,6 +5,7 @@ from typing import List
|
||||
|
||||
from fastapi import APIRouter, Depends, Header, HTTPException, status
|
||||
from pydantic import BaseModel
|
||||
from sqlalchemy import text
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.core.config import get_db
|
||||
@@ -132,9 +133,20 @@ def delete_server(server_id: int, db: Session = Depends(get_db), _: models.User
|
||||
obj = db.query(MonitoredServer).filter(MonitoredServer.id == server_id).first()
|
||||
if not obj:
|
||||
raise HTTPException(status_code=404, detail='Server not found')
|
||||
state = db.query(ServerState).filter(ServerState.server_id == server_id).first()
|
||||
if state:
|
||||
db.delete(state)
|
||||
|
||||
# Delete dependent rows first to avoid FK errors.
|
||||
db.query(ServerState).filter(ServerState.server_id == server_id).delete(synchronize_session=False)
|
||||
|
||||
# Backward-compatible cleanup for deprecated challenge tables that may still exist in older DBs.
|
||||
try:
|
||||
db.execute(text('DELETE FROM server_handshake_nonces WHERE server_id = :server_id'), {'server_id': server_id})
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
db.execute(text('DELETE FROM server_challenges WHERE server_id = :server_id'), {'server_id': server_id})
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
db.delete(obj)
|
||||
db.commit()
|
||||
return None
|
||||
|
||||
@@ -1,494 +1,76 @@
|
||||
# OpenClaw Plugin 开发计划
|
||||
# OpenClaw Plugin 开发计划(当前版)
|
||||
|
||||
**文档版本**: 0.1.0
|
||||
**日期**: 2026-03-19
|
||||
**状态**: 开发中
|
||||
**状态**: API Key 方案已落地,challenge / WebSocket 旧方案已废弃。
|
||||
|
||||
---
|
||||
## 当前架构
|
||||
|
||||
## 1. 概述
|
||||
- HarborForge Monitor Backend 提供服务器注册与遥测接收接口
|
||||
- OpenClaw Gateway 加载 `harborforge-monitor` 插件
|
||||
- 插件在 `gateway_start` 时启动 sidecar (`server/telemetry.mjs`)
|
||||
- sidecar 通过 **HTTP + X-API-Key** 向 Backend 上报遥测
|
||||
|
||||
本文档定义 HarborForge.OpenclawPlugin 的开发计划,以及 Backend 需要提供的接口支持。
|
||||
## 当前后端接口
|
||||
|
||||
### 1.1 目标
|
||||
### 公开接口
|
||||
- `GET /monitor/public/overview`
|
||||
|
||||
开发一个 OpenClaw 插件,将服务器遥测数据(系统指标 + OpenClaw 状态)实时传输到 HarborForge Monitor。
|
||||
### 管理接口
|
||||
- `GET /monitor/admin/servers`
|
||||
- `POST /monitor/admin/servers`
|
||||
- `DELETE /monitor/admin/servers/{id}`
|
||||
- `POST /monitor/admin/servers/{id}/api-key`
|
||||
- `DELETE /monitor/admin/servers/{id}/api-key`
|
||||
|
||||
### 1.2 架构关系
|
||||
### 插件上报接口
|
||||
- `POST /monitor/server/heartbeat-v2`
|
||||
- Header: `X-API-Key`
|
||||
- Body:
|
||||
- `identifier`
|
||||
- `openclaw_version`
|
||||
- `plugin_version`
|
||||
- `agents`
|
||||
- `cpu_pct`
|
||||
- `mem_pct`
|
||||
- `disk_pct`
|
||||
- `swap_pct`
|
||||
- `load_avg`
|
||||
- `uptime_seconds`
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────┐
|
||||
│ 远程服务器 (VPS) │
|
||||
│ ┌──────────────────────────────────────────────────────┐ │
|
||||
│ │ OpenClaw Gateway │ │
|
||||
│ │ ┌────────────────────────────────────────────────┐ │ │
|
||||
│ │ │ HarborForge.OpenclawPlugin │ │ │
|
||||
│ │ │ - 生命周期管理 (随 Gateway 启动/停止) │ │ │
|
||||
│ │ │ - 启动 sidecar 进程 │ │ │
|
||||
│ │ └────────────────────────────────────────────────┘ │ │
|
||||
│ │ │ │ │
|
||||
│ │ ▼ 启动/管理 │ │
|
||||
│ │ ┌────────────────────────────────────────────────┐ │ │
|
||||
│ │ │ Sidecar (独立 Node 进程) │ │ │
|
||||
│ │ │ - 收集系统指标 (CPU/内存/磁盘) │ │ │
|
||||
│ │ │ - 读取 OpenClaw 状态 (agents) │ │ │
|
||||
│ │ │ - HTTP/WebSocket 上报到 Monitor │ │ │
|
||||
│ │ └────────────────────────────────────────────────┘ │ │
|
||||
│ └──────────────────────────────────────────────────────┘ │
|
||||
└─────────────────────────────────────────────────────────────┘
|
||||
│
|
||||
│ HTTP / WebSocket
|
||||
▼
|
||||
┌─────────────────────────┐
|
||||
│ HarborForge.Backend │
|
||||
│ - /monitor/* 接口 │
|
||||
│ - 数据存储 │
|
||||
└─────────────────────────┘
|
||||
```
|
||||
## 数据语义
|
||||
|
||||
---
|
||||
- `openclaw_version`: 远程服务器上的 OpenClaw 版本
|
||||
- `plugin_version`: 远程服务器上的 harborforge-monitor 插件版本
|
||||
|
||||
## 2. Backend 当前能力评估
|
||||
## 已废弃内容
|
||||
|
||||
### 2.1 已实现接口 ✅
|
||||
以下旧方案已经废弃,不再作为实现路径:
|
||||
|
||||
| 接口 | 功能 | 完整度 | 说明 |
|
||||
|------|------|--------|------|
|
||||
| `GET /monitor/public/server-public-key` | 获取 RSA 公钥 | ✅ 100% | 用于插件加密 |
|
||||
| `POST /admin/servers` | 注册服务器 | ✅ 100% | 返回 server_id |
|
||||
| `POST /admin/servers/{id}/challenge` | 生成 challenge | ✅ 100% | 10分钟有效期 |
|
||||
| `WS /monitor/server/ws` | WebSocket 连接 | ✅ 100% | 完整验证逻辑 |
|
||||
| `POST /monitor/server/heartbeat` | HTTP 心跳 | ⚠️ 50% | 缺少安全验证 |
|
||||
- challenge UUID
|
||||
- `GET /monitor/public/server-public-key`
|
||||
- `POST /monitor/admin/servers/{id}/challenge`
|
||||
- `WS /monitor/server/ws`
|
||||
- challenge / nonce 握手逻辑
|
||||
|
||||
### 2.2 当前 HTTP Heartbeat 问题 🔴
|
||||
## 前端管理页要求
|
||||
|
||||
```python
|
||||
# 当前实现 (app/api/routers/monitor.py:191-207)
|
||||
@router.post('/server/heartbeat')
|
||||
def server_heartbeat(payload: ServerHeartbeat, db: Session = Depends(get_db)):
|
||||
server = db.query(MonitoredServer).filter(
|
||||
MonitoredServer.identifier == payload.identifier
|
||||
).first()
|
||||
# 问题:只验证 identifier 存在,不验证 challenge!
|
||||
# 任何人知道 identifier 就可以伪造数据
|
||||
```
|
||||
Monitor 管理页应提供:
|
||||
|
||||
**对比 WebSocket 实现**:
|
||||
```python
|
||||
# WebSocket 有完整验证
|
||||
ch = db.query(ServerChallenge).filter(
|
||||
ServerChallenge.challenge_uuid == challenge_uuid,
|
||||
ServerChallenge.server_id == server.id
|
||||
).first()
|
||||
if not ch or ch.used_at is not None or ch.expires_at < now():
|
||||
await websocket.close(code=4401) # 验证失败
|
||||
```
|
||||
- Add Server
|
||||
- Generate API Key
|
||||
- Revoke API Key
|
||||
- Delete Server
|
||||
|
||||
---
|
||||
不再提供 `Generate Challenge`。
|
||||
|
||||
## 3. Backend 需要补充的接口
|
||||
## 运行流程
|
||||
|
||||
### 3.1 方案 A:增强 HTTP Heartbeat(推荐短期方案)
|
||||
1. 管理员在 Monitor 中注册服务器
|
||||
2. 管理员为服务器生成 API Key
|
||||
3. 将 API Key 写入 `~/.openclaw/openclaw.json`
|
||||
4. 重启 OpenClaw Gateway
|
||||
5. 插件启动 sidecar
|
||||
6. sidecar 定时向 `/monitor/server/heartbeat-v2` 上报
|
||||
|
||||
添加 challenge_uuid 验证:
|
||||
## 备注
|
||||
|
||||
```python
|
||||
@router.post('/server/heartbeat')
|
||||
def server_heartbeat(
|
||||
payload: ServerHeartbeatSecure, # 包含 challenge_uuid
|
||||
db: Session = Depends(get_db)
|
||||
):
|
||||
# 1. 验证服务器
|
||||
server = db.query(MonitoredServer).filter(...).first()
|
||||
if not server:
|
||||
raise HTTPException(404, 'unknown server')
|
||||
|
||||
# 2. 验证 challenge
|
||||
ch = db.query(ServerChallenge).filter(
|
||||
ServerChallenge.challenge_uuid == payload.challenge_uuid,
|
||||
ServerChallenge.server_id == server.id
|
||||
).first()
|
||||
|
||||
if not ch or ch.expires_at < now():
|
||||
raise HTTPException(401, 'invalid or expired challenge')
|
||||
|
||||
# 3. 存储数据...
|
||||
```
|
||||
|
||||
**优点**: 与现有 WebSocket 验证逻辑一致
|
||||
**缺点**: Challenge 10分钟过期,需要定期重新注册
|
||||
|
||||
### 3.2 方案 B:API Key 模式(推荐长期方案)
|
||||
|
||||
添加长期有效的 API Key:
|
||||
|
||||
```python
|
||||
# 1. 模型添加 api_key 字段
|
||||
class MonitoredServer(Base):
|
||||
...
|
||||
api_key = Column(String(64), nullable=True, unique=True, index=True)
|
||||
|
||||
# 2. 新增接口:生成/重置 API Key
|
||||
@router.post('/admin/servers/{id}/api-key')
|
||||
def generate_api_key(server_id: int, ...):
|
||||
api_key = secrets.token_urlsafe(32)
|
||||
# 存储并返回 (仅显示一次)
|
||||
|
||||
# 3. 心跳接口验证 API Key
|
||||
@router.post('/server/heartbeat-v2')
|
||||
def server_heartbeat_v2(
|
||||
payload: ServerHeartbeat,
|
||||
x_api_key: str = Header(...),
|
||||
db: Session = Depends(get_db)
|
||||
):
|
||||
server = db.query(MonitoredServer).filter(
|
||||
MonitoredServer.identifier == payload.identifier,
|
||||
MonitoredServer.api_key == x_api_key
|
||||
).first()
|
||||
if not server:
|
||||
raise HTTPException(401, 'invalid credentials')
|
||||
```
|
||||
|
||||
**优点**: 长期有效,适合自动化 Agent
|
||||
**缺点**: 需要新增数据库字段和接口
|
||||
|
||||
### 3.3 方案 C:加密 Payload(最高安全)
|
||||
|
||||
参考 WebSocket 的 encrypted_payload:
|
||||
|
||||
```python
|
||||
@router.post('/server/heartbeat')
|
||||
def server_heartbeat(
|
||||
encrypted_payload: str = Body(...), # RSA-OAEP 加密
|
||||
db: Session = Depends(get_db)
|
||||
):
|
||||
# 1. 解密
|
||||
data = decrypt_payload_b64(encrypted_payload)
|
||||
|
||||
# 2. 验证时间戳 (防重放)
|
||||
if not ts_within(data['ts'], max_minutes=10):
|
||||
raise HTTPException(401, 'expired')
|
||||
|
||||
# 3. 验证 challenge
|
||||
ch = db.query(ServerChallenge).filter(
|
||||
challenge_uuid=data['challenge_uuid']
|
||||
).first()
|
||||
...
|
||||
```
|
||||
|
||||
**优点**: 最高安全性
|
||||
**缺点**: 客户端实现复杂,需要 RSA 加密
|
||||
|
||||
---
|
||||
|
||||
## 4. OpenclawPlugin 开发计划
|
||||
|
||||
### Phase 1: 基础功能开发(2-3天)
|
||||
|
||||
**目标**: 可运行的基础版本(开发环境)
|
||||
|
||||
| 任务 | 说明 | 依赖 |
|
||||
|------|------|------|
|
||||
| 1.1 Sidecar 基础架构 | Node.js 项目结构,配置加载 | 无 |
|
||||
| 1.2 系统指标收集 | CPU/内存/磁盘/运行时间 | 无 |
|
||||
| 1.3 OpenClaw 状态读取 | 读取 agents.json,版本信息 | 无 |
|
||||
| 1.4 HTTP 心跳上报 | 使用当前 /heartbeat 接口 | ⚠️ 不安全,仅开发 |
|
||||
| 1.5 Plugin 生命周期 | 随 Gateway 启动/停止 Sidecar | 无 |
|
||||
|
||||
**验收标准**:
|
||||
- [ ] 可以收集系统指标
|
||||
- [ ] 可以上报到 Backend
|
||||
- [ ] 可以在 Monitor 面板看到数据
|
||||
|
||||
### Phase 2: 安全增强(2-3天)
|
||||
|
||||
**目标**: 生产环境可用的安全版本
|
||||
|
||||
| 任务 | 说明 | 依赖 |
|
||||
|------|------|------|
|
||||
| 2.1 WebSocket 支持 | 实现 WS 连接和加密握手 | Backend WS 接口 ✅ |
|
||||
| 2.2 或:等待 HTTP 增强 | Backend 添加 challenge 验证 | Backend 更新 |
|
||||
| 2.3 重试/退避逻辑 | 连接失败时指数退避 | 无 |
|
||||
| 2.4 离线缓存 | 暂时存储,恢复后批量上报 | 无 |
|
||||
|
||||
**验收标准**:
|
||||
- [ ] 连接需要验证(WebSocket 或增强 HTTP)
|
||||
- [ ] 网络中断后自动恢复
|
||||
- [ ] 数据不丢失
|
||||
|
||||
### Phase 3: 生产就绪(1-2天)
|
||||
|
||||
**目标**: 稳定可靠的监控系统
|
||||
|
||||
| 任务 | 说明 | 依赖 |
|
||||
|------|------|------|
|
||||
| 3.1 日志和诊断 | 结构化日志,调试接口 | 无 |
|
||||
| 3.2 性能优化 | 减少资源占用 | 无 |
|
||||
| 3.3 安装脚本完善 | 参考 PaddedCell 格式 | 无 |
|
||||
| 3.4 文档编写 | 部署指南,故障排查 | 无 |
|
||||
|
||||
**验收标准**:
|
||||
- [ ] 长时间稳定运行(7天+)
|
||||
- [ ] 资源占用 < 1% CPU,< 50MB 内存
|
||||
- [ ] 安装脚本一键部署
|
||||
|
||||
---
|
||||
|
||||
## 5. 接口规格详细定义
|
||||
|
||||
### 5.1 当前可用接口
|
||||
|
||||
#### GET /monitor/public/server-public-key
|
||||
```yaml
|
||||
Response:
|
||||
public_key_pem: string # RSA 公钥 (PEM 格式)
|
||||
key_id: string # 公钥指纹
|
||||
```
|
||||
|
||||
#### POST /admin/servers
|
||||
```yaml
|
||||
Headers:
|
||||
Authorization: Bearer {admin_token}
|
||||
Body:
|
||||
identifier: string # 唯一标识 (如 "vps.t1")
|
||||
display_name: string # 显示名称
|
||||
Response:
|
||||
id: int
|
||||
identifier: string
|
||||
challenge_uuid: string # 10分钟有效
|
||||
expires_at: ISO8601
|
||||
```
|
||||
|
||||
#### WS /monitor/server/ws
|
||||
```yaml
|
||||
连接流程:
|
||||
1. Client -> Server: GET /monitor/server/ws (Upgrade)
|
||||
2. Client -> Server: {
|
||||
"encrypted_payload": "base64_rsa_encrypted_json"
|
||||
}
|
||||
# 或明文(向后兼容):
|
||||
# {
|
||||
# "identifier": "vps.t1",
|
||||
# "challenge_uuid": "...",
|
||||
# "nonce": "...",
|
||||
# "ts": "2026-03-19T14:00:00Z"
|
||||
# }
|
||||
3. Server -> Client: { "ok": true, "server_id": 1 }
|
||||
4. Client -> Server: {
|
||||
"event": "server.metrics",
|
||||
"payload": { "cpu_pct": 12.5, "mem_pct": 41.2, ... }
|
||||
}
|
||||
```
|
||||
|
||||
#### POST /monitor/server/heartbeat(当前版本,不安全)
|
||||
```yaml
|
||||
Body:
|
||||
identifier: string
|
||||
openclaw_version: string
|
||||
agents: [{id, name, status}]
|
||||
cpu_pct: float
|
||||
mem_pct: float
|
||||
disk_pct: float
|
||||
swap_pct: float
|
||||
Response:
|
||||
ok: true
|
||||
server_id: int
|
||||
last_seen_at: ISO8601
|
||||
```
|
||||
|
||||
### 5.2 建议新增接口
|
||||
|
||||
#### POST /server/heartbeat-secure(增强版)
|
||||
```yaml
|
||||
Body:
|
||||
identifier: string
|
||||
challenge_uuid: string # 新增:必填
|
||||
openclaw_version: string
|
||||
agents: [...]
|
||||
cpu_pct: float
|
||||
mem_pct: float
|
||||
disk_pct: float
|
||||
swap_pct: float
|
||||
timestamp: ISO8601 # 可选:防重放
|
||||
Response:
|
||||
ok: true
|
||||
server_id: int
|
||||
last_seen_at: ISO8601
|
||||
challenge_expires_at: ISO8601
|
||||
Error:
|
||||
401: { detail: "invalid or expired challenge" }
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 6. 数据模型
|
||||
|
||||
### 6.1 当前 Backend 模型
|
||||
|
||||
```python
|
||||
# app/models/monitor.py
|
||||
|
||||
class MonitoredServer:
|
||||
id: int
|
||||
identifier: str # 唯一标识
|
||||
display_name: str
|
||||
is_enabled: bool
|
||||
created_by: int
|
||||
created_at: datetime
|
||||
# 建议添加:
|
||||
# api_key: str # 长期有效的 API Key
|
||||
|
||||
class ServerChallenge:
|
||||
id: int
|
||||
server_id: int
|
||||
challenge_uuid: str # 10分钟有效
|
||||
expires_at: datetime
|
||||
used_at: datetime # 首次使用时间
|
||||
created_at: datetime
|
||||
|
||||
class ServerState:
|
||||
id: int
|
||||
server_id: int
|
||||
openclaw_version: str
|
||||
agents_json: str # JSON 序列化
|
||||
cpu_pct: float
|
||||
mem_pct: float
|
||||
disk_pct: float
|
||||
swap_pct: float
|
||||
last_seen_at: datetime
|
||||
updated_at: datetime
|
||||
```
|
||||
|
||||
### 6.2 Plugin 配置模型
|
||||
|
||||
```typescript
|
||||
// ~/.openclaw/openclaw.json
|
||||
{
|
||||
"plugins": {
|
||||
"harborforge-monitor": {
|
||||
"enabled": true,
|
||||
"backendUrl": "https://monitor.hangman-lab.top",
|
||||
"identifier": "vps.t1", // 服务器标识
|
||||
"challengeUuid": "uuid-here", // 从 /admin/servers/{id}/challenge 获取
|
||||
"apiKey": "key-here", // 如果使用 API Key 模式(可选)
|
||||
"reportIntervalSec": 30,
|
||||
"httpFallbackIntervalSec": 60,
|
||||
"logLevel": "info"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 7. 开发时序图
|
||||
|
||||
### 7.1 首次部署流程
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant Admin
|
||||
participant Backend
|
||||
participant Plugin
|
||||
participant Server as Server State
|
||||
|
||||
Admin->>Backend: POST /admin/servers<br/>{identifier: "vps.t1"}
|
||||
Backend->>Admin: {id: 1, identifier: "vps.t1"}
|
||||
|
||||
Admin->>Backend: POST /admin/servers/1/challenge
|
||||
Backend->>Admin: {challenge_uuid: "abc-123", expires_at: "..."}
|
||||
|
||||
Admin->>Server: 配置 challenge_uuid
|
||||
|
||||
Note over Server: ~/.openclaw/openclaw.json
|
||||
|
||||
Server->>Backend: openclaw gateway restart
|
||||
|
||||
Plugin->>Backend: GET /monitor/public/server-public-key
|
||||
Backend->>Plugin: {public_key_pem: "..."}
|
||||
|
||||
alt WebSocket 模式
|
||||
Plugin->>Backend: WS /monitor/server/ws
|
||||
Plugin->>Backend: {challenge_uuid, nonce, ts}
|
||||
Backend->>Plugin: {ok: true}
|
||||
loop 每 30 秒
|
||||
Plugin->>Backend: {event: "server.metrics", payload: {...}}
|
||||
end
|
||||
else HTTP 模式
|
||||
loop 每 30 秒
|
||||
Plugin->>Backend: POST /server/heartbeat<br/>{challenge_uuid, ...}
|
||||
Backend->>Plugin: {ok: true}
|
||||
end
|
||||
end
|
||||
```
|
||||
|
||||
### 7.2 数据上报格式
|
||||
|
||||
```json
|
||||
{
|
||||
"identifier": "vps.t1",
|
||||
"challenge_uuid": "550e8400-e29b-41d4-a716-446655440000",
|
||||
"timestamp": "2026-03-19T14:30:00Z",
|
||||
|
||||
"cpu_pct": 12.5,
|
||||
"mem_pct": 41.2,
|
||||
"mem_used_mb": 4096,
|
||||
"mem_total_mb": 8192,
|
||||
"disk_pct": 62.0,
|
||||
"disk_used_gb": 500.5,
|
||||
"disk_total_gb": 1000.0,
|
||||
"swap_pct": 0.0,
|
||||
"uptime_sec": 86400,
|
||||
"load_avg_1m": 0.5,
|
||||
"platform": "linux",
|
||||
"hostname": "vps.t1",
|
||||
|
||||
"openclaw_version": "1.2.3",
|
||||
"openclaw_agent_count": 2,
|
||||
"openclaw_agents": [
|
||||
{"id": "dev", "name": "Developer", "status": "running"},
|
||||
{"id": "ops", "name": "Operator", "status": "idle"}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 8. 风险与缓解
|
||||
|
||||
| 风险 | 影响 | 缓解措施 |
|
||||
|------|------|----------|
|
||||
| HTTP Heartbeat 无验证 | 数据伪造 | 使用 WebSocket 或等待 Backend 修复 |
|
||||
| Challenge 10分钟过期 | 需要频繁更新 | Backend 添加 API Key 模式 |
|
||||
| 网络中断 | 数据丢失 | Plugin 实现离线缓存 |
|
||||
| 资源占用过高 | 影响业务 | 控制采集频率,优化实现 |
|
||||
| Sidecar 崩溃 | 监控中断 | Plugin 自动重启 Sidecar |
|
||||
|
||||
---
|
||||
|
||||
## 9. 下一步行动
|
||||
|
||||
### Backend 团队
|
||||
- [ ] 决定采用方案 A/B/C 增强 HTTP Heartbeat 安全
|
||||
- [ ] 实现 `/server/heartbeat-secure` 或增强现有接口
|
||||
- [ ] (可选)添加 API Key 支持
|
||||
|
||||
### Plugin 开发团队
|
||||
- [ ] Phase 1: 基础功能开发(使用当前不安全 HTTP,仅开发测试)
|
||||
- [ ] Phase 2: 集成 WebSocket(立即可用,最安全)
|
||||
- [ ] 等待 Backend 更新后,切换到安全 HTTP
|
||||
|
||||
---
|
||||
|
||||
## 10. 参考文档
|
||||
|
||||
- 原始设计文档: `docs/monitor-server-connector-plan.md`
|
||||
- Backend 代码: `app/api/routers/monitor.py`
|
||||
- Backend 模型: `app/models/monitor.py`
|
||||
- 加密服务: `app/services/crypto_box.py`
|
||||
- PaddedCell 安装脚本参考: `https://git.hangman-lab.top/nav/PaddedCell`
|
||||
|
||||
---
|
||||
|
||||
**文档维护者**: HarborForge Team
|
||||
**更新频率**: 随开发进度更新
|
||||
当前保留了对旧 challenge 数据表的**删除兼容清理**(仅为兼容老数据库中的遗留数据),但不再保留 challenge 功能入口与运行时逻辑。
|
||||
|
||||
@@ -1,68 +1,76 @@
|
||||
# OpenClaw Monitor Agent Plugin 开发计划(草案)
|
||||
# HarborForge Monitor / OpenClaw Plugin Connector Plan
|
||||
|
||||
## 目标
|
||||
让被监测服务器通过 WebSocket 主动接入 HarborForge Backend,并持续上报:
|
||||
- OpenClaw 版本
|
||||
- agent 列表
|
||||
- 每 5 分钟主机指标(CPU/MEM/DISK/SWAP)
|
||||
- agent 状态变更事件
|
||||
|
||||
## 握手流程
|
||||
1. Admin 在 HarborForge 后台添加 server identifier
|
||||
2. Admin 生成 challenge(10 分钟有效)
|
||||
3. 插件请求 `GET /monitor/public/server-public-key` 获取公钥
|
||||
4. 插件构造 payload:
|
||||
- `identifier`
|
||||
- `challenge_uuid`
|
||||
- `nonce`(随机)
|
||||
- `ts`(ISO8601)
|
||||
5. 使用 RSA-OAEP(SHA256) 公钥加密,base64 后作为 `encrypted_payload` 发给 `WS /monitor/server/ws`
|
||||
6. 握手成功后进入事件上报通道
|
||||
使用 **API Key + HTTP heartbeat** 连接 HarborForge Monitor 与远程 OpenClaw 节点。
|
||||
|
||||
## 插件事件协议
|
||||
### server.hello
|
||||
## 认证方式
|
||||
|
||||
- 管理员为服务器生成 API Key
|
||||
- 插件通过 `X-API-Key` 调用 heartbeat 接口
|
||||
- 不再使用 challenge / RSA 公钥 / WebSocket 握手
|
||||
|
||||
## 上报接口
|
||||
|
||||
`POST /monitor/server/heartbeat-v2`
|
||||
|
||||
### Headers
|
||||
- `X-API-Key: <server-api-key>`
|
||||
|
||||
### Payload
|
||||
```json
|
||||
{
|
||||
"event": "server.hello",
|
||||
"payload": {
|
||||
"openclaw_version": "x.y.z",
|
||||
"agents": [{"id": "a1", "name": "agent-1", "status": "idle"}]
|
||||
"identifier": "vps.t1",
|
||||
"openclaw_version": "OpenClaw 2026.3.13 (61d171a)",
|
||||
"plugin_version": "0.1.0",
|
||||
"agents": [
|
||||
{ "id": "agent-bot1", "name": "agent-bot1", "status": "configured" }
|
||||
],
|
||||
"cpu_pct": 12.3,
|
||||
"mem_pct": 45.6,
|
||||
"disk_pct": 78.9,
|
||||
"swap_pct": 0,
|
||||
"load_avg": [0.12, 0.08, 0.03],
|
||||
"uptime_seconds": 12345
|
||||
}
|
||||
```
|
||||
|
||||
## 语义
|
||||
|
||||
- `openclaw_version`: 远程主机上的 OpenClaw 版本
|
||||
- `plugin_version`: harborforge-monitor 插件版本
|
||||
|
||||
## 插件生命周期
|
||||
|
||||
- 插件注册到 Gateway
|
||||
- 在 `gateway_start` 启动 `server/telemetry.mjs`
|
||||
- 在 `gateway_stop` 停止 sidecar
|
||||
|
||||
## 配置位置
|
||||
|
||||
`~/.openclaw/openclaw.json`
|
||||
|
||||
```json
|
||||
{
|
||||
"plugins": {
|
||||
"entries": {
|
||||
"harborforge-monitor": {
|
||||
"enabled": true,
|
||||
"config": {
|
||||
"enabled": true,
|
||||
"backendUrl": "http://127.0.0.1:8000",
|
||||
"identifier": "vps.t1",
|
||||
"apiKey": "your-api-key"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### server.metrics(每 5 分钟)
|
||||
```json
|
||||
{
|
||||
"event": "server.metrics",
|
||||
"payload": {
|
||||
"cpu_pct": 21.3,
|
||||
"mem_pct": 42.1,
|
||||
"disk_pct": 55.9,
|
||||
"swap_pct": 0.0,
|
||||
"agents": [{"id": "a1", "name": "agent-1", "status": "busy"}]
|
||||
}
|
||||
}
|
||||
```
|
||||
## 已废弃
|
||||
|
||||
### agent.status_changed(可选)
|
||||
```json
|
||||
{
|
||||
"event": "agent.status_changed",
|
||||
"payload": {
|
||||
"agents": [{"id": "a1", "name": "agent-1", "status": "focus"}]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## 实施里程碑
|
||||
- M1: Node/Python CLI 插件最小握手联通
|
||||
- M2: 指标采集 + 周期上报
|
||||
- M3: agent 状态采集与变更事件
|
||||
- M4: 守护化(systemd)+ 断线重连 + 本地日志
|
||||
|
||||
## 风险与注意事项
|
||||
- 时钟漂移会导致 `ts` 校验失败(建议 NTP)
|
||||
- challenge 仅一次可用,重复使用会被拒绝
|
||||
- nonce 重放会被拒绝
|
||||
- 需要保证插件本地安全保存 identifier/challenge(短期)
|
||||
- challenge UUID
|
||||
- server public key
|
||||
- WebSocket telemetry
|
||||
- encrypted handshake payload
|
||||
|
||||
Reference in New Issue
Block a user