Compare commits
6 Commits
67c648d6d8
...
9b5e2dc15c
| Author | SHA1 | Date | |
|---|---|---|---|
| 9b5e2dc15c | |||
| 8e0f158266 | |||
| 97f12cac7a | |||
| a0d0c7b3a1 | |||
| c70f90cb52 | |||
| 929a722c66 |
@@ -1,21 +1,20 @@
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from datetime import datetime, timezone
|
||||
import json
|
||||
import uuid
|
||||
from typing import List, Dict
|
||||
import secrets
|
||||
from typing import List
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, status, WebSocket, WebSocketDisconnect
|
||||
from fastapi import APIRouter, Depends, Header, HTTPException, status
|
||||
from pydantic import BaseModel
|
||||
from sqlalchemy import text
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.core.config import get_db, SessionLocal
|
||||
from app.core.config import get_db
|
||||
from app.api.deps import get_current_user_or_apikey
|
||||
from app.models import models
|
||||
from app.models.monitor import (
|
||||
ProviderAccount,
|
||||
MonitoredServer,
|
||||
ServerState,
|
||||
ServerChallenge,
|
||||
ServerHandshakeNonce,
|
||||
)
|
||||
from app.services.monitoring import (
|
||||
get_task_stats_cached,
|
||||
@@ -23,11 +22,8 @@ from app.services.monitoring import (
|
||||
get_server_states_view,
|
||||
test_provider_connection,
|
||||
)
|
||||
from app.services.crypto_box import get_public_key_info, decrypt_payload_b64, ts_within
|
||||
|
||||
router = APIRouter(prefix='/monitor', tags=['Monitor'])
|
||||
SUPPORTED_PROVIDERS = {'anthropic', 'openai', 'minimax', 'kimi', 'qwen'}
|
||||
ACTIVE_WS: Dict[int, WebSocket] = {}
|
||||
|
||||
|
||||
class ProviderAccountCreate(BaseModel):
|
||||
@@ -46,23 +42,12 @@ class MonitoredServerCreate(BaseModel):
|
||||
display_name: str | None = None
|
||||
|
||||
|
||||
class ChallengeResponse(BaseModel):
|
||||
identifier: str
|
||||
challenge_uuid: str
|
||||
expires_at: str
|
||||
|
||||
|
||||
def require_admin(current_user: models.User = Depends(get_current_user_or_apikey)):
|
||||
if not current_user.is_admin:
|
||||
raise HTTPException(status_code=403, detail='Admin required')
|
||||
return current_user
|
||||
|
||||
|
||||
@router.get('/public/server-public-key')
|
||||
def monitor_public_key():
|
||||
return get_public_key_info()
|
||||
|
||||
|
||||
@router.get('/public/overview')
|
||||
def public_overview(db: Session = Depends(get_db)):
|
||||
return {
|
||||
@@ -143,37 +128,57 @@ def add_server(payload: MonitoredServerCreate, db: Session = Depends(get_db), us
|
||||
return {'id': obj.id, 'identifier': obj.identifier, 'display_name': obj.display_name, 'is_enabled': obj.is_enabled}
|
||||
|
||||
|
||||
@router.post('/admin/servers/{server_id}/challenge', response_model=ChallengeResponse)
|
||||
def issue_server_challenge(server_id: int, db: Session = Depends(get_db), _: models.User = Depends(require_admin)):
|
||||
server = db.query(MonitoredServer).filter(MonitoredServer.id == server_id).first()
|
||||
if not server:
|
||||
raise HTTPException(status_code=404, detail='Server not found')
|
||||
challenge_uuid = str(uuid.uuid4())
|
||||
expires_at = datetime.now(timezone.utc) + timedelta(minutes=10)
|
||||
ch = ServerChallenge(server_id=server_id, challenge_uuid=challenge_uuid, expires_at=expires_at)
|
||||
db.add(ch)
|
||||
db.commit()
|
||||
return ChallengeResponse(identifier=server.identifier, challenge_uuid=challenge_uuid, expires_at=expires_at.isoformat())
|
||||
|
||||
|
||||
@router.delete('/admin/servers/{server_id}', status_code=status.HTTP_204_NO_CONTENT)
|
||||
def delete_server(server_id: int, db: Session = Depends(get_db), _: models.User = Depends(require_admin)):
|
||||
obj = db.query(MonitoredServer).filter(MonitoredServer.id == server_id).first()
|
||||
if not obj:
|
||||
raise HTTPException(status_code=404, detail='Server not found')
|
||||
state = db.query(ServerState).filter(ServerState.server_id == server_id).first()
|
||||
if state:
|
||||
db.delete(state)
|
||||
db.query(ServerChallenge).filter(ServerChallenge.server_id == server_id).delete()
|
||||
db.query(ServerHandshakeNonce).filter(ServerHandshakeNonce.server_id == server_id).delete()
|
||||
|
||||
# Delete dependent rows first to avoid FK errors.
|
||||
db.query(ServerState).filter(ServerState.server_id == server_id).delete(synchronize_session=False)
|
||||
|
||||
# Backward-compatible cleanup for deprecated challenge tables that may still exist in older DBs.
|
||||
try:
|
||||
db.execute(text('DELETE FROM server_handshake_nonces WHERE server_id = :server_id'), {'server_id': server_id})
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
db.execute(text('DELETE FROM server_challenges WHERE server_id = :server_id'), {'server_id': server_id})
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
db.delete(obj)
|
||||
db.commit()
|
||||
return None
|
||||
|
||||
|
||||
@router.post('/admin/servers/{server_id}/api-key')
|
||||
def generate_api_key(server_id: int, db: Session = Depends(get_db), _: models.User = Depends(require_admin)):
|
||||
"""Generate or regenerate API Key for a server (heartbeat v2)"""
|
||||
server = db.query(MonitoredServer).filter(MonitoredServer.id == server_id).first()
|
||||
if not server:
|
||||
raise HTTPException(status_code=404, detail='Server not found')
|
||||
api_key = secrets.token_urlsafe(32)
|
||||
server.api_key = api_key
|
||||
db.commit()
|
||||
return {'server_id': server.id, 'api_key': api_key, 'message': 'Store this key securely - it will not be shown again'}
|
||||
|
||||
|
||||
@router.delete('/admin/servers/{server_id}/api-key', status_code=status.HTTP_204_NO_CONTENT)
|
||||
def revoke_api_key(server_id: int, db: Session = Depends(get_db), _: models.User = Depends(require_admin)):
|
||||
"""Revoke API Key for a server"""
|
||||
server = db.query(MonitoredServer).filter(MonitoredServer.id == server_id).first()
|
||||
if not server:
|
||||
raise HTTPException(status_code=404, detail='Server not found')
|
||||
server.api_key = None
|
||||
db.commit()
|
||||
return None
|
||||
|
||||
|
||||
class ServerHeartbeat(BaseModel):
|
||||
identifier: str
|
||||
openclaw_version: str | None = None
|
||||
plugin_version: str | None = None
|
||||
agents: List[dict] = []
|
||||
cpu_pct: float | None = None
|
||||
mem_pct: float | None = None
|
||||
@@ -191,6 +196,7 @@ def server_heartbeat(payload: ServerHeartbeat, db: Session = Depends(get_db)):
|
||||
st = ServerState(server_id=server.id)
|
||||
db.add(st)
|
||||
st.openclaw_version = payload.openclaw_version
|
||||
st.plugin_version = payload.plugin_version
|
||||
st.agents_json = json.dumps(payload.agents, ensure_ascii=False)
|
||||
st.cpu_pct = payload.cpu_pct
|
||||
st.mem_pct = payload.mem_pct
|
||||
@@ -201,87 +207,45 @@ def server_heartbeat(payload: ServerHeartbeat, db: Session = Depends(get_db)):
|
||||
return {'ok': True, 'server_id': server.id, 'last_seen_at': st.last_seen_at}
|
||||
|
||||
|
||||
@router.websocket('/server/ws')
|
||||
async def server_ws(websocket: WebSocket):
|
||||
await websocket.accept()
|
||||
db = SessionLocal()
|
||||
server_id = None
|
||||
try:
|
||||
hello = await websocket.receive_json()
|
||||
# Heartbeat v2 with API Key authentication
|
||||
class TelemetryPayload(BaseModel):
|
||||
identifier: str
|
||||
openclaw_version: str | None = None
|
||||
plugin_version: str | None = None
|
||||
agents: List[dict] = []
|
||||
cpu_pct: float | None = None
|
||||
mem_pct: float | None = None
|
||||
disk_pct: float | None = None
|
||||
swap_pct: float | None = None
|
||||
load_avg: list[float] | None = None
|
||||
uptime_seconds: int | None = None
|
||||
|
||||
encrypted_payload = (hello.get('encrypted_payload') or '').strip()
|
||||
if encrypted_payload:
|
||||
data = decrypt_payload_b64(encrypted_payload)
|
||||
identifier = (data.get('identifier') or '').strip()
|
||||
challenge_uuid = (data.get('challenge_uuid') or '').strip()
|
||||
nonce = (data.get('nonce') or '').strip()
|
||||
ts = data.get('ts')
|
||||
if not ts_within(ts, max_minutes=10):
|
||||
await websocket.close(code=4401)
|
||||
return
|
||||
else:
|
||||
# backward compatible mode
|
||||
identifier = (hello.get('identifier') or '').strip()
|
||||
challenge_uuid = (hello.get('challenge_uuid') or '').strip()
|
||||
nonce = (hello.get('nonce') or '').strip()
|
||||
|
||||
if not identifier or not challenge_uuid or not nonce:
|
||||
await websocket.close(code=4400)
|
||||
return
|
||||
@router.post('/server/heartbeat-v2')
|
||||
def server_heartbeat_v2(
|
||||
payload: TelemetryPayload,
|
||||
x_api_key: str = Header(..., alias='X-API-Key', description='API Key from /admin/servers/{id}/api-key'),
|
||||
db: Session = Depends(get_db)
|
||||
):
|
||||
"""Server heartbeat using API Key authentication (no challenge_uuid required)"""
|
||||
server = db.query(MonitoredServer).filter(
|
||||
MonitoredServer.api_key == x_api_key,
|
||||
MonitoredServer.is_enabled == True
|
||||
).first()
|
||||
if not server:
|
||||
raise HTTPException(status_code=401, detail='Invalid or missing API Key')
|
||||
st = db.query(ServerState).filter(ServerState.server_id == server.id).first()
|
||||
if not st:
|
||||
st = ServerState(server_id=server.id)
|
||||
db.add(st)
|
||||
st.openclaw_version = payload.openclaw_version
|
||||
st.plugin_version = payload.plugin_version
|
||||
st.agents_json = json.dumps(payload.agents, ensure_ascii=False)
|
||||
st.cpu_pct = payload.cpu_pct
|
||||
st.mem_pct = payload.mem_pct
|
||||
st.disk_pct = payload.disk_pct
|
||||
st.swap_pct = payload.swap_pct
|
||||
st.last_seen_at = datetime.now(timezone.utc)
|
||||
db.commit()
|
||||
return {'ok': True, 'server_id': server.id, 'identifier': server.identifier, 'last_seen_at': st.last_seen_at}
|
||||
|
||||
server = db.query(MonitoredServer).filter(MonitoredServer.identifier == identifier, MonitoredServer.is_enabled == True).first()
|
||||
if not server:
|
||||
await websocket.close(code=4404)
|
||||
return
|
||||
|
||||
ch = db.query(ServerChallenge).filter(ServerChallenge.challenge_uuid == challenge_uuid, ServerChallenge.server_id == server.id).first()
|
||||
if not ch or ch.used_at is not None or ch.expires_at < datetime.now(timezone.utc):
|
||||
await websocket.close(code=4401)
|
||||
return
|
||||
|
||||
nonce_used = db.query(ServerHandshakeNonce).filter(ServerHandshakeNonce.server_id == server.id, ServerHandshakeNonce.nonce == nonce).first()
|
||||
if nonce_used:
|
||||
await websocket.close(code=4409)
|
||||
return
|
||||
|
||||
db.add(ServerHandshakeNonce(server_id=server.id, nonce=nonce))
|
||||
ch.used_at = datetime.now(timezone.utc)
|
||||
db.commit()
|
||||
|
||||
server_id = server.id
|
||||
ACTIVE_WS[server.id] = websocket
|
||||
await websocket.send_json({'ok': True, 'server_id': server.id, 'message': 'connected'})
|
||||
|
||||
while True:
|
||||
msg = await websocket.receive_json()
|
||||
event = msg.get('event')
|
||||
payload = msg.get('payload') or {}
|
||||
st = db.query(ServerState).filter(ServerState.server_id == server.id).first()
|
||||
if not st:
|
||||
st = ServerState(server_id=server.id)
|
||||
db.add(st)
|
||||
|
||||
if event == 'server.hello':
|
||||
st.openclaw_version = payload.get('openclaw_version')
|
||||
st.agents_json = json.dumps(payload.get('agents') or [], ensure_ascii=False)
|
||||
elif event in {'server.metrics', 'agent.status_changed'}:
|
||||
st.cpu_pct = payload.get('cpu_pct', st.cpu_pct)
|
||||
st.mem_pct = payload.get('mem_pct', st.mem_pct)
|
||||
st.disk_pct = payload.get('disk_pct', st.disk_pct)
|
||||
st.swap_pct = payload.get('swap_pct', st.swap_pct)
|
||||
if 'agents' in payload:
|
||||
st.agents_json = json.dumps(payload.get('agents') or [], ensure_ascii=False)
|
||||
|
||||
st.last_seen_at = datetime.now(timezone.utc)
|
||||
db.commit()
|
||||
except WebSocketDisconnect:
|
||||
pass
|
||||
except Exception:
|
||||
try:
|
||||
await websocket.close(code=1011)
|
||||
except Exception:
|
||||
pass
|
||||
finally:
|
||||
if server_id and ACTIVE_WS.get(server_id) is websocket:
|
||||
ACTIVE_WS.pop(server_id, None)
|
||||
db.close()
|
||||
|
||||
@@ -215,6 +215,15 @@ def _migrate_schema():
|
||||
"DEFAULT 'open'"
|
||||
))
|
||||
|
||||
# --- monitored_servers.api_key for heartbeat v2 ---
|
||||
if _has_table(db, "monitored_servers") and not _has_column(db, "monitored_servers", "api_key"):
|
||||
db.execute(text("ALTER TABLE monitored_servers ADD COLUMN api_key VARCHAR(64) NULL"))
|
||||
db.execute(text("CREATE UNIQUE INDEX idx_monitored_servers_api_key ON monitored_servers (api_key)"))
|
||||
|
||||
# --- server_states.plugin_version for monitor plugin telemetry ---
|
||||
if _has_table(db, "server_states") and not _has_column(db, "server_states", "plugin_version"):
|
||||
db.execute(text("ALTER TABLE server_states ADD COLUMN plugin_version VARCHAR(64) NULL"))
|
||||
|
||||
db.commit()
|
||||
except Exception as e:
|
||||
db.rollback()
|
||||
|
||||
@@ -39,6 +39,7 @@ class MonitoredServer(Base):
|
||||
identifier = Column(String(128), nullable=False, unique=True)
|
||||
display_name = Column(String(128), nullable=True)
|
||||
is_enabled = Column(Boolean, default=True)
|
||||
api_key = Column(String(64), nullable=True, unique=True, index=True) # API Key for server heartbeat v2
|
||||
created_by = Column(Integer, nullable=True)
|
||||
created_at = Column(DateTime(timezone=True), server_default=func.now())
|
||||
|
||||
@@ -49,6 +50,7 @@ class ServerState(Base):
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
server_id = Column(Integer, ForeignKey('monitored_servers.id'), nullable=False, unique=True)
|
||||
openclaw_version = Column(String(64), nullable=True)
|
||||
plugin_version = Column(String(64), nullable=True)
|
||||
agents_json = Column(Text, nullable=True) # json list
|
||||
cpu_pct = Column(Float, nullable=True)
|
||||
mem_pct = Column(Float, nullable=True)
|
||||
@@ -57,22 +59,3 @@ class ServerState(Base):
|
||||
last_seen_at = Column(DateTime(timezone=True), nullable=True)
|
||||
updated_at = Column(DateTime(timezone=True), onupdate=func.now())
|
||||
|
||||
|
||||
class ServerChallenge(Base):
|
||||
__tablename__ = 'server_challenges'
|
||||
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
server_id = Column(Integer, ForeignKey('monitored_servers.id'), nullable=False, index=True)
|
||||
challenge_uuid = Column(String(64), nullable=False, unique=True, index=True)
|
||||
expires_at = Column(DateTime(timezone=True), nullable=False)
|
||||
used_at = Column(DateTime(timezone=True), nullable=True)
|
||||
created_at = Column(DateTime(timezone=True), server_default=func.now())
|
||||
|
||||
|
||||
class ServerHandshakeNonce(Base):
|
||||
__tablename__ = 'server_handshake_nonces'
|
||||
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
server_id = Column(Integer, ForeignKey('monitored_servers.id'), nullable=False, index=True)
|
||||
nonce = Column(String(128), nullable=False, index=True)
|
||||
created_at = Column(DateTime(timezone=True), server_default=func.now())
|
||||
|
||||
@@ -288,6 +288,9 @@ def get_server_states_view(db: Session, offline_after_minutes: int = 7):
|
||||
for s in servers:
|
||||
st = db.query(ServerState).filter(ServerState.server_id == s.id).first()
|
||||
last_seen = st.last_seen_at if st else None
|
||||
# Handle timezone-naive datetimes from database
|
||||
if last_seen and last_seen.tzinfo is None:
|
||||
last_seen = last_seen.replace(tzinfo=timezone.utc)
|
||||
online = bool(last_seen and (now - last_seen).total_seconds() <= offline_after_minutes * 60)
|
||||
out.append({
|
||||
'server_id': s.id,
|
||||
@@ -295,6 +298,7 @@ def get_server_states_view(db: Session, offline_after_minutes: int = 7):
|
||||
'display_name': s.display_name or s.identifier,
|
||||
'online': online,
|
||||
'openclaw_version': st.openclaw_version if st else None,
|
||||
'plugin_version': st.plugin_version if st else None,
|
||||
'cpu_pct': st.cpu_pct if st else None,
|
||||
'mem_pct': st.mem_pct if st else None,
|
||||
'disk_pct': st.disk_pct if st else None,
|
||||
|
||||
76
docs/OPENCLAW_PLUGIN_DEV_PLAN.md
Normal file
76
docs/OPENCLAW_PLUGIN_DEV_PLAN.md
Normal file
@@ -0,0 +1,76 @@
|
||||
# OpenClaw Plugin 开发计划(当前版)
|
||||
|
||||
**状态**: API Key 方案已落地,challenge / WebSocket 旧方案已废弃。
|
||||
|
||||
## 当前架构
|
||||
|
||||
- HarborForge Monitor Backend 提供服务器注册与遥测接收接口
|
||||
- OpenClaw Gateway 加载 `harborforge-monitor` 插件
|
||||
- 插件在 `gateway_start` 时启动 sidecar (`server/telemetry.mjs`)
|
||||
- sidecar 通过 **HTTP + X-API-Key** 向 Backend 上报遥测
|
||||
|
||||
## 当前后端接口
|
||||
|
||||
### 公开接口
|
||||
- `GET /monitor/public/overview`
|
||||
|
||||
### 管理接口
|
||||
- `GET /monitor/admin/servers`
|
||||
- `POST /monitor/admin/servers`
|
||||
- `DELETE /monitor/admin/servers/{id}`
|
||||
- `POST /monitor/admin/servers/{id}/api-key`
|
||||
- `DELETE /monitor/admin/servers/{id}/api-key`
|
||||
|
||||
### 插件上报接口
|
||||
- `POST /monitor/server/heartbeat-v2`
|
||||
- Header: `X-API-Key`
|
||||
- Body:
|
||||
- `identifier`
|
||||
- `openclaw_version`
|
||||
- `plugin_version`
|
||||
- `agents`
|
||||
- `cpu_pct`
|
||||
- `mem_pct`
|
||||
- `disk_pct`
|
||||
- `swap_pct`
|
||||
- `load_avg`
|
||||
- `uptime_seconds`
|
||||
|
||||
## 数据语义
|
||||
|
||||
- `openclaw_version`: 远程服务器上的 OpenClaw 版本
|
||||
- `plugin_version`: 远程服务器上的 harborforge-monitor 插件版本
|
||||
|
||||
## 已废弃内容
|
||||
|
||||
以下旧方案已经废弃,不再作为实现路径:
|
||||
|
||||
- challenge UUID
|
||||
- `GET /monitor/public/server-public-key`
|
||||
- `POST /monitor/admin/servers/{id}/challenge`
|
||||
- `WS /monitor/server/ws`
|
||||
- challenge / nonce 握手逻辑
|
||||
|
||||
## 前端管理页要求
|
||||
|
||||
Monitor 管理页应提供:
|
||||
|
||||
- Add Server
|
||||
- Generate API Key
|
||||
- Revoke API Key
|
||||
- Delete Server
|
||||
|
||||
不再提供 `Generate Challenge`。
|
||||
|
||||
## 运行流程
|
||||
|
||||
1. 管理员在 Monitor 中注册服务器
|
||||
2. 管理员为服务器生成 API Key
|
||||
3. 将 API Key 写入 `~/.openclaw/openclaw.json`
|
||||
4. 重启 OpenClaw Gateway
|
||||
5. 插件启动 sidecar
|
||||
6. sidecar 定时向 `/monitor/server/heartbeat-v2` 上报
|
||||
|
||||
## 备注
|
||||
|
||||
当前保留了对旧 challenge 数据表的**删除兼容清理**(仅为兼容老数据库中的遗留数据),但不再保留 challenge 功能入口与运行时逻辑。
|
||||
108
docs/examples/monitor_heartbeat_secure.py
Normal file
108
docs/examples/monitor_heartbeat_secure.py
Normal file
@@ -0,0 +1,108 @@
|
||||
"""
|
||||
Backend 监控接口需要补充的安全验证代码
|
||||
添加到 app/api/routers/monitor.py
|
||||
"""
|
||||
|
||||
from fastapi import Header
|
||||
|
||||
class ServerHeartbeatSecure(BaseModel):
|
||||
identifier: str
|
||||
challenge_uuid: str # 新增:必须提供 challenge
|
||||
openclaw_version: str | None = None
|
||||
agents: List[dict] = []
|
||||
cpu_pct: float | None = None
|
||||
mem_pct: float | None = None
|
||||
disk_pct: float | None = None
|
||||
swap_pct: float | None = None
|
||||
|
||||
|
||||
@router.post('/server/heartbeat')
|
||||
def server_heartbeat(
|
||||
payload: ServerHeartbeatSecure,
|
||||
x_challenge_uuid: str = Header(..., description='Challenge UUID from registration'),
|
||||
db: Session = Depends(get_db)
|
||||
):
|
||||
"""
|
||||
安全版本的心跳接口,验证 challenge_uuid
|
||||
"""
|
||||
# 1. 验证服务器存在且启用
|
||||
server = db.query(MonitoredServer).filter(
|
||||
MonitoredServer.identifier == payload.identifier,
|
||||
MonitoredServer.is_enabled == True
|
||||
).first()
|
||||
if not server:
|
||||
raise HTTPException(status_code=404, detail='unknown server identifier')
|
||||
|
||||
# 2. 验证 challenge_uuid 存在且有效
|
||||
ch = db.query(ServerChallenge).filter(
|
||||
ServerChallenge.challenge_uuid == x_challenge_uuid,
|
||||
ServerChallenge.server_id == server.id
|
||||
).first()
|
||||
|
||||
if not ch:
|
||||
raise HTTPException(status_code=401, detail='invalid challenge')
|
||||
|
||||
if ch.expires_at < datetime.now(timezone.utc):
|
||||
raise HTTPException(status_code=401, detail='challenge expired')
|
||||
|
||||
# 3. 可选:检查 challenge 是否已被使用过
|
||||
# 如果是首次验证,标记为已使用
|
||||
if ch.used_at is None:
|
||||
ch.used_at = datetime.now(timezone.utc)
|
||||
|
||||
# 4. 存储状态
|
||||
st = db.query(ServerState).filter(ServerState.server_id == server.id).first()
|
||||
if not st:
|
||||
st = ServerState(server_id=server.id)
|
||||
db.add(st)
|
||||
|
||||
st.openclaw_version = payload.openclaw_version
|
||||
st.agents_json = json.dumps(payload.agents, ensure_ascii=False)
|
||||
st.cpu_pct = payload.cpu_pct
|
||||
st.mem_pct = payload.mem_pct
|
||||
st.disk_pct = payload.disk_pct
|
||||
st.swap_pct = payload.swap_pct
|
||||
st.last_seen_at = datetime.now(timezone.utc)
|
||||
|
||||
db.commit()
|
||||
|
||||
return {
|
||||
'ok': True,
|
||||
'server_id': server.id,
|
||||
'last_seen_at': st.last_seen_at,
|
||||
'challenge_valid_until': ch.expires_at.isoformat()
|
||||
}
|
||||
|
||||
|
||||
# 或者,如果需要长期有效的 API Key 方式:
|
||||
|
||||
class ServerHeartbeatApiKey(BaseModel):
|
||||
identifier: str
|
||||
openclaw_version: str | None = None
|
||||
agents: List[dict] = []
|
||||
cpu_pct: float | None = None
|
||||
mem_pct: float | None = None
|
||||
disk_pct: float | None = None
|
||||
swap_pct: float | None = None
|
||||
|
||||
|
||||
@router.post('/server/heartbeat-v2')
|
||||
def server_heartbeat_v2(
|
||||
payload: ServerHeartbeatApiKey,
|
||||
x_api_key: str = Header(..., description='Server API Key'),
|
||||
db: Session = Depends(get_db)
|
||||
):
|
||||
"""
|
||||
使用 API Key 的心跳接口(长期有效,不需要 challenge)
|
||||
需要在 MonitoredServer 模型中添加 api_key 字段
|
||||
"""
|
||||
server = db.query(MonitoredServer).filter(
|
||||
MonitoredServer.identifier == payload.identifier,
|
||||
MonitoredServer.is_enabled == True,
|
||||
MonitoredServer.api_key == x_api_key # 需要添加 api_key 字段
|
||||
).first()
|
||||
|
||||
if not server:
|
||||
raise HTTPException(status_code=401, detail='invalid identifier or api key')
|
||||
|
||||
# ... 存储状态 ...
|
||||
@@ -1,68 +1,76 @@
|
||||
# OpenClaw Monitor Agent Plugin 开发计划(草案)
|
||||
# HarborForge Monitor / OpenClaw Plugin Connector Plan
|
||||
|
||||
## 目标
|
||||
让被监测服务器通过 WebSocket 主动接入 HarborForge Backend,并持续上报:
|
||||
- OpenClaw 版本
|
||||
- agent 列表
|
||||
- 每 5 分钟主机指标(CPU/MEM/DISK/SWAP)
|
||||
- agent 状态变更事件
|
||||
|
||||
## 握手流程
|
||||
1. Admin 在 HarborForge 后台添加 server identifier
|
||||
2. Admin 生成 challenge(10 分钟有效)
|
||||
3. 插件请求 `GET /monitor/public/server-public-key` 获取公钥
|
||||
4. 插件构造 payload:
|
||||
- `identifier`
|
||||
- `challenge_uuid`
|
||||
- `nonce`(随机)
|
||||
- `ts`(ISO8601)
|
||||
5. 使用 RSA-OAEP(SHA256) 公钥加密,base64 后作为 `encrypted_payload` 发给 `WS /monitor/server/ws`
|
||||
6. 握手成功后进入事件上报通道
|
||||
使用 **API Key + HTTP heartbeat** 连接 HarborForge Monitor 与远程 OpenClaw 节点。
|
||||
|
||||
## 插件事件协议
|
||||
### server.hello
|
||||
## 认证方式
|
||||
|
||||
- 管理员为服务器生成 API Key
|
||||
- 插件通过 `X-API-Key` 调用 heartbeat 接口
|
||||
- 不再使用 challenge / RSA 公钥 / WebSocket 握手
|
||||
|
||||
## 上报接口
|
||||
|
||||
`POST /monitor/server/heartbeat-v2`
|
||||
|
||||
### Headers
|
||||
- `X-API-Key: <server-api-key>`
|
||||
|
||||
### Payload
|
||||
```json
|
||||
{
|
||||
"event": "server.hello",
|
||||
"payload": {
|
||||
"openclaw_version": "x.y.z",
|
||||
"agents": [{"id": "a1", "name": "agent-1", "status": "idle"}]
|
||||
"identifier": "vps.t1",
|
||||
"openclaw_version": "OpenClaw 2026.3.13 (61d171a)",
|
||||
"plugin_version": "0.1.0",
|
||||
"agents": [
|
||||
{ "id": "agent-bot1", "name": "agent-bot1", "status": "configured" }
|
||||
],
|
||||
"cpu_pct": 12.3,
|
||||
"mem_pct": 45.6,
|
||||
"disk_pct": 78.9,
|
||||
"swap_pct": 0,
|
||||
"load_avg": [0.12, 0.08, 0.03],
|
||||
"uptime_seconds": 12345
|
||||
}
|
||||
```
|
||||
|
||||
## 语义
|
||||
|
||||
- `openclaw_version`: 远程主机上的 OpenClaw 版本
|
||||
- `plugin_version`: harborforge-monitor 插件版本
|
||||
|
||||
## 插件生命周期
|
||||
|
||||
- 插件注册到 Gateway
|
||||
- 在 `gateway_start` 启动 `server/telemetry.mjs`
|
||||
- 在 `gateway_stop` 停止 sidecar
|
||||
|
||||
## 配置位置
|
||||
|
||||
`~/.openclaw/openclaw.json`
|
||||
|
||||
```json
|
||||
{
|
||||
"plugins": {
|
||||
"entries": {
|
||||
"harborforge-monitor": {
|
||||
"enabled": true,
|
||||
"config": {
|
||||
"enabled": true,
|
||||
"backendUrl": "http://127.0.0.1:8000",
|
||||
"identifier": "vps.t1",
|
||||
"apiKey": "your-api-key"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### server.metrics(每 5 分钟)
|
||||
```json
|
||||
{
|
||||
"event": "server.metrics",
|
||||
"payload": {
|
||||
"cpu_pct": 21.3,
|
||||
"mem_pct": 42.1,
|
||||
"disk_pct": 55.9,
|
||||
"swap_pct": 0.0,
|
||||
"agents": [{"id": "a1", "name": "agent-1", "status": "busy"}]
|
||||
}
|
||||
}
|
||||
```
|
||||
## 已废弃
|
||||
|
||||
### agent.status_changed(可选)
|
||||
```json
|
||||
{
|
||||
"event": "agent.status_changed",
|
||||
"payload": {
|
||||
"agents": [{"id": "a1", "name": "agent-1", "status": "focus"}]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## 实施里程碑
|
||||
- M1: Node/Python CLI 插件最小握手联通
|
||||
- M2: 指标采集 + 周期上报
|
||||
- M3: agent 状态采集与变更事件
|
||||
- M4: 守护化(systemd)+ 断线重连 + 本地日志
|
||||
|
||||
## 风险与注意事项
|
||||
- 时钟漂移会导致 `ts` 校验失败(建议 NTP)
|
||||
- challenge 仅一次可用,重复使用会被拒绝
|
||||
- nonce 重放会被拒绝
|
||||
- 需要保证插件本地安全保存 identifier/challenge(短期)
|
||||
- challenge UUID
|
||||
- server public key
|
||||
- WebSocket telemetry
|
||||
- encrypted handshake payload
|
||||
|
||||
Reference in New Issue
Block a user