Compare commits
6 Commits
67c648d6d8
...
9b5e2dc15c
| Author | SHA1 | Date | |
|---|---|---|---|
| 9b5e2dc15c | |||
| 8e0f158266 | |||
| 97f12cac7a | |||
| a0d0c7b3a1 | |||
| c70f90cb52 | |||
| 929a722c66 |
@@ -1,21 +1,20 @@
|
|||||||
from datetime import datetime, timedelta, timezone
|
from datetime import datetime, timezone
|
||||||
import json
|
import json
|
||||||
import uuid
|
import secrets
|
||||||
from typing import List, Dict
|
from typing import List
|
||||||
|
|
||||||
from fastapi import APIRouter, Depends, HTTPException, status, WebSocket, WebSocketDisconnect
|
from fastapi import APIRouter, Depends, Header, HTTPException, status
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
from sqlalchemy import text
|
||||||
from sqlalchemy.orm import Session
|
from sqlalchemy.orm import Session
|
||||||
|
|
||||||
from app.core.config import get_db, SessionLocal
|
from app.core.config import get_db
|
||||||
from app.api.deps import get_current_user_or_apikey
|
from app.api.deps import get_current_user_or_apikey
|
||||||
from app.models import models
|
from app.models import models
|
||||||
from app.models.monitor import (
|
from app.models.monitor import (
|
||||||
ProviderAccount,
|
ProviderAccount,
|
||||||
MonitoredServer,
|
MonitoredServer,
|
||||||
ServerState,
|
ServerState,
|
||||||
ServerChallenge,
|
|
||||||
ServerHandshakeNonce,
|
|
||||||
)
|
)
|
||||||
from app.services.monitoring import (
|
from app.services.monitoring import (
|
||||||
get_task_stats_cached,
|
get_task_stats_cached,
|
||||||
@@ -23,11 +22,8 @@ from app.services.monitoring import (
|
|||||||
get_server_states_view,
|
get_server_states_view,
|
||||||
test_provider_connection,
|
test_provider_connection,
|
||||||
)
|
)
|
||||||
from app.services.crypto_box import get_public_key_info, decrypt_payload_b64, ts_within
|
|
||||||
|
|
||||||
router = APIRouter(prefix='/monitor', tags=['Monitor'])
|
router = APIRouter(prefix='/monitor', tags=['Monitor'])
|
||||||
SUPPORTED_PROVIDERS = {'anthropic', 'openai', 'minimax', 'kimi', 'qwen'}
|
SUPPORTED_PROVIDERS = {'anthropic', 'openai', 'minimax', 'kimi', 'qwen'}
|
||||||
ACTIVE_WS: Dict[int, WebSocket] = {}
|
|
||||||
|
|
||||||
|
|
||||||
class ProviderAccountCreate(BaseModel):
|
class ProviderAccountCreate(BaseModel):
|
||||||
@@ -46,23 +42,12 @@ class MonitoredServerCreate(BaseModel):
|
|||||||
display_name: str | None = None
|
display_name: str | None = None
|
||||||
|
|
||||||
|
|
||||||
class ChallengeResponse(BaseModel):
|
|
||||||
identifier: str
|
|
||||||
challenge_uuid: str
|
|
||||||
expires_at: str
|
|
||||||
|
|
||||||
|
|
||||||
def require_admin(current_user: models.User = Depends(get_current_user_or_apikey)):
|
def require_admin(current_user: models.User = Depends(get_current_user_or_apikey)):
|
||||||
if not current_user.is_admin:
|
if not current_user.is_admin:
|
||||||
raise HTTPException(status_code=403, detail='Admin required')
|
raise HTTPException(status_code=403, detail='Admin required')
|
||||||
return current_user
|
return current_user
|
||||||
|
|
||||||
|
|
||||||
@router.get('/public/server-public-key')
|
|
||||||
def monitor_public_key():
|
|
||||||
return get_public_key_info()
|
|
||||||
|
|
||||||
|
|
||||||
@router.get('/public/overview')
|
@router.get('/public/overview')
|
||||||
def public_overview(db: Session = Depends(get_db)):
|
def public_overview(db: Session = Depends(get_db)):
|
||||||
return {
|
return {
|
||||||
@@ -143,37 +128,57 @@ def add_server(payload: MonitoredServerCreate, db: Session = Depends(get_db), us
|
|||||||
return {'id': obj.id, 'identifier': obj.identifier, 'display_name': obj.display_name, 'is_enabled': obj.is_enabled}
|
return {'id': obj.id, 'identifier': obj.identifier, 'display_name': obj.display_name, 'is_enabled': obj.is_enabled}
|
||||||
|
|
||||||
|
|
||||||
@router.post('/admin/servers/{server_id}/challenge', response_model=ChallengeResponse)
|
|
||||||
def issue_server_challenge(server_id: int, db: Session = Depends(get_db), _: models.User = Depends(require_admin)):
|
|
||||||
server = db.query(MonitoredServer).filter(MonitoredServer.id == server_id).first()
|
|
||||||
if not server:
|
|
||||||
raise HTTPException(status_code=404, detail='Server not found')
|
|
||||||
challenge_uuid = str(uuid.uuid4())
|
|
||||||
expires_at = datetime.now(timezone.utc) + timedelta(minutes=10)
|
|
||||||
ch = ServerChallenge(server_id=server_id, challenge_uuid=challenge_uuid, expires_at=expires_at)
|
|
||||||
db.add(ch)
|
|
||||||
db.commit()
|
|
||||||
return ChallengeResponse(identifier=server.identifier, challenge_uuid=challenge_uuid, expires_at=expires_at.isoformat())
|
|
||||||
|
|
||||||
|
|
||||||
@router.delete('/admin/servers/{server_id}', status_code=status.HTTP_204_NO_CONTENT)
|
@router.delete('/admin/servers/{server_id}', status_code=status.HTTP_204_NO_CONTENT)
|
||||||
def delete_server(server_id: int, db: Session = Depends(get_db), _: models.User = Depends(require_admin)):
|
def delete_server(server_id: int, db: Session = Depends(get_db), _: models.User = Depends(require_admin)):
|
||||||
obj = db.query(MonitoredServer).filter(MonitoredServer.id == server_id).first()
|
obj = db.query(MonitoredServer).filter(MonitoredServer.id == server_id).first()
|
||||||
if not obj:
|
if not obj:
|
||||||
raise HTTPException(status_code=404, detail='Server not found')
|
raise HTTPException(status_code=404, detail='Server not found')
|
||||||
state = db.query(ServerState).filter(ServerState.server_id == server_id).first()
|
|
||||||
if state:
|
# Delete dependent rows first to avoid FK errors.
|
||||||
db.delete(state)
|
db.query(ServerState).filter(ServerState.server_id == server_id).delete(synchronize_session=False)
|
||||||
db.query(ServerChallenge).filter(ServerChallenge.server_id == server_id).delete()
|
|
||||||
db.query(ServerHandshakeNonce).filter(ServerHandshakeNonce.server_id == server_id).delete()
|
# Backward-compatible cleanup for deprecated challenge tables that may still exist in older DBs.
|
||||||
|
try:
|
||||||
|
db.execute(text('DELETE FROM server_handshake_nonces WHERE server_id = :server_id'), {'server_id': server_id})
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
db.execute(text('DELETE FROM server_challenges WHERE server_id = :server_id'), {'server_id': server_id})
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
db.delete(obj)
|
db.delete(obj)
|
||||||
db.commit()
|
db.commit()
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
@router.post('/admin/servers/{server_id}/api-key')
|
||||||
|
def generate_api_key(server_id: int, db: Session = Depends(get_db), _: models.User = Depends(require_admin)):
|
||||||
|
"""Generate or regenerate API Key for a server (heartbeat v2)"""
|
||||||
|
server = db.query(MonitoredServer).filter(MonitoredServer.id == server_id).first()
|
||||||
|
if not server:
|
||||||
|
raise HTTPException(status_code=404, detail='Server not found')
|
||||||
|
api_key = secrets.token_urlsafe(32)
|
||||||
|
server.api_key = api_key
|
||||||
|
db.commit()
|
||||||
|
return {'server_id': server.id, 'api_key': api_key, 'message': 'Store this key securely - it will not be shown again'}
|
||||||
|
|
||||||
|
|
||||||
|
@router.delete('/admin/servers/{server_id}/api-key', status_code=status.HTTP_204_NO_CONTENT)
|
||||||
|
def revoke_api_key(server_id: int, db: Session = Depends(get_db), _: models.User = Depends(require_admin)):
|
||||||
|
"""Revoke API Key for a server"""
|
||||||
|
server = db.query(MonitoredServer).filter(MonitoredServer.id == server_id).first()
|
||||||
|
if not server:
|
||||||
|
raise HTTPException(status_code=404, detail='Server not found')
|
||||||
|
server.api_key = None
|
||||||
|
db.commit()
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
class ServerHeartbeat(BaseModel):
|
class ServerHeartbeat(BaseModel):
|
||||||
identifier: str
|
identifier: str
|
||||||
openclaw_version: str | None = None
|
openclaw_version: str | None = None
|
||||||
|
plugin_version: str | None = None
|
||||||
agents: List[dict] = []
|
agents: List[dict] = []
|
||||||
cpu_pct: float | None = None
|
cpu_pct: float | None = None
|
||||||
mem_pct: float | None = None
|
mem_pct: float | None = None
|
||||||
@@ -191,6 +196,7 @@ def server_heartbeat(payload: ServerHeartbeat, db: Session = Depends(get_db)):
|
|||||||
st = ServerState(server_id=server.id)
|
st = ServerState(server_id=server.id)
|
||||||
db.add(st)
|
db.add(st)
|
||||||
st.openclaw_version = payload.openclaw_version
|
st.openclaw_version = payload.openclaw_version
|
||||||
|
st.plugin_version = payload.plugin_version
|
||||||
st.agents_json = json.dumps(payload.agents, ensure_ascii=False)
|
st.agents_json = json.dumps(payload.agents, ensure_ascii=False)
|
||||||
st.cpu_pct = payload.cpu_pct
|
st.cpu_pct = payload.cpu_pct
|
||||||
st.mem_pct = payload.mem_pct
|
st.mem_pct = payload.mem_pct
|
||||||
@@ -201,87 +207,45 @@ def server_heartbeat(payload: ServerHeartbeat, db: Session = Depends(get_db)):
|
|||||||
return {'ok': True, 'server_id': server.id, 'last_seen_at': st.last_seen_at}
|
return {'ok': True, 'server_id': server.id, 'last_seen_at': st.last_seen_at}
|
||||||
|
|
||||||
|
|
||||||
@router.websocket('/server/ws')
|
# Heartbeat v2 with API Key authentication
|
||||||
async def server_ws(websocket: WebSocket):
|
class TelemetryPayload(BaseModel):
|
||||||
await websocket.accept()
|
identifier: str
|
||||||
db = SessionLocal()
|
openclaw_version: str | None = None
|
||||||
server_id = None
|
plugin_version: str | None = None
|
||||||
try:
|
agents: List[dict] = []
|
||||||
hello = await websocket.receive_json()
|
cpu_pct: float | None = None
|
||||||
|
mem_pct: float | None = None
|
||||||
|
disk_pct: float | None = None
|
||||||
|
swap_pct: float | None = None
|
||||||
|
load_avg: list[float] | None = None
|
||||||
|
uptime_seconds: int | None = None
|
||||||
|
|
||||||
encrypted_payload = (hello.get('encrypted_payload') or '').strip()
|
|
||||||
if encrypted_payload:
|
|
||||||
data = decrypt_payload_b64(encrypted_payload)
|
|
||||||
identifier = (data.get('identifier') or '').strip()
|
|
||||||
challenge_uuid = (data.get('challenge_uuid') or '').strip()
|
|
||||||
nonce = (data.get('nonce') or '').strip()
|
|
||||||
ts = data.get('ts')
|
|
||||||
if not ts_within(ts, max_minutes=10):
|
|
||||||
await websocket.close(code=4401)
|
|
||||||
return
|
|
||||||
else:
|
|
||||||
# backward compatible mode
|
|
||||||
identifier = (hello.get('identifier') or '').strip()
|
|
||||||
challenge_uuid = (hello.get('challenge_uuid') or '').strip()
|
|
||||||
nonce = (hello.get('nonce') or '').strip()
|
|
||||||
|
|
||||||
if not identifier or not challenge_uuid or not nonce:
|
@router.post('/server/heartbeat-v2')
|
||||||
await websocket.close(code=4400)
|
def server_heartbeat_v2(
|
||||||
return
|
payload: TelemetryPayload,
|
||||||
|
x_api_key: str = Header(..., alias='X-API-Key', description='API Key from /admin/servers/{id}/api-key'),
|
||||||
|
db: Session = Depends(get_db)
|
||||||
|
):
|
||||||
|
"""Server heartbeat using API Key authentication (no challenge_uuid required)"""
|
||||||
|
server = db.query(MonitoredServer).filter(
|
||||||
|
MonitoredServer.api_key == x_api_key,
|
||||||
|
MonitoredServer.is_enabled == True
|
||||||
|
).first()
|
||||||
|
if not server:
|
||||||
|
raise HTTPException(status_code=401, detail='Invalid or missing API Key')
|
||||||
|
st = db.query(ServerState).filter(ServerState.server_id == server.id).first()
|
||||||
|
if not st:
|
||||||
|
st = ServerState(server_id=server.id)
|
||||||
|
db.add(st)
|
||||||
|
st.openclaw_version = payload.openclaw_version
|
||||||
|
st.plugin_version = payload.plugin_version
|
||||||
|
st.agents_json = json.dumps(payload.agents, ensure_ascii=False)
|
||||||
|
st.cpu_pct = payload.cpu_pct
|
||||||
|
st.mem_pct = payload.mem_pct
|
||||||
|
st.disk_pct = payload.disk_pct
|
||||||
|
st.swap_pct = payload.swap_pct
|
||||||
|
st.last_seen_at = datetime.now(timezone.utc)
|
||||||
|
db.commit()
|
||||||
|
return {'ok': True, 'server_id': server.id, 'identifier': server.identifier, 'last_seen_at': st.last_seen_at}
|
||||||
|
|
||||||
server = db.query(MonitoredServer).filter(MonitoredServer.identifier == identifier, MonitoredServer.is_enabled == True).first()
|
|
||||||
if not server:
|
|
||||||
await websocket.close(code=4404)
|
|
||||||
return
|
|
||||||
|
|
||||||
ch = db.query(ServerChallenge).filter(ServerChallenge.challenge_uuid == challenge_uuid, ServerChallenge.server_id == server.id).first()
|
|
||||||
if not ch or ch.used_at is not None or ch.expires_at < datetime.now(timezone.utc):
|
|
||||||
await websocket.close(code=4401)
|
|
||||||
return
|
|
||||||
|
|
||||||
nonce_used = db.query(ServerHandshakeNonce).filter(ServerHandshakeNonce.server_id == server.id, ServerHandshakeNonce.nonce == nonce).first()
|
|
||||||
if nonce_used:
|
|
||||||
await websocket.close(code=4409)
|
|
||||||
return
|
|
||||||
|
|
||||||
db.add(ServerHandshakeNonce(server_id=server.id, nonce=nonce))
|
|
||||||
ch.used_at = datetime.now(timezone.utc)
|
|
||||||
db.commit()
|
|
||||||
|
|
||||||
server_id = server.id
|
|
||||||
ACTIVE_WS[server.id] = websocket
|
|
||||||
await websocket.send_json({'ok': True, 'server_id': server.id, 'message': 'connected'})
|
|
||||||
|
|
||||||
while True:
|
|
||||||
msg = await websocket.receive_json()
|
|
||||||
event = msg.get('event')
|
|
||||||
payload = msg.get('payload') or {}
|
|
||||||
st = db.query(ServerState).filter(ServerState.server_id == server.id).first()
|
|
||||||
if not st:
|
|
||||||
st = ServerState(server_id=server.id)
|
|
||||||
db.add(st)
|
|
||||||
|
|
||||||
if event == 'server.hello':
|
|
||||||
st.openclaw_version = payload.get('openclaw_version')
|
|
||||||
st.agents_json = json.dumps(payload.get('agents') or [], ensure_ascii=False)
|
|
||||||
elif event in {'server.metrics', 'agent.status_changed'}:
|
|
||||||
st.cpu_pct = payload.get('cpu_pct', st.cpu_pct)
|
|
||||||
st.mem_pct = payload.get('mem_pct', st.mem_pct)
|
|
||||||
st.disk_pct = payload.get('disk_pct', st.disk_pct)
|
|
||||||
st.swap_pct = payload.get('swap_pct', st.swap_pct)
|
|
||||||
if 'agents' in payload:
|
|
||||||
st.agents_json = json.dumps(payload.get('agents') or [], ensure_ascii=False)
|
|
||||||
|
|
||||||
st.last_seen_at = datetime.now(timezone.utc)
|
|
||||||
db.commit()
|
|
||||||
except WebSocketDisconnect:
|
|
||||||
pass
|
|
||||||
except Exception:
|
|
||||||
try:
|
|
||||||
await websocket.close(code=1011)
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
finally:
|
|
||||||
if server_id and ACTIVE_WS.get(server_id) is websocket:
|
|
||||||
ACTIVE_WS.pop(server_id, None)
|
|
||||||
db.close()
|
|
||||||
|
|||||||
@@ -215,6 +215,15 @@ def _migrate_schema():
|
|||||||
"DEFAULT 'open'"
|
"DEFAULT 'open'"
|
||||||
))
|
))
|
||||||
|
|
||||||
|
# --- monitored_servers.api_key for heartbeat v2 ---
|
||||||
|
if _has_table(db, "monitored_servers") and not _has_column(db, "monitored_servers", "api_key"):
|
||||||
|
db.execute(text("ALTER TABLE monitored_servers ADD COLUMN api_key VARCHAR(64) NULL"))
|
||||||
|
db.execute(text("CREATE UNIQUE INDEX idx_monitored_servers_api_key ON monitored_servers (api_key)"))
|
||||||
|
|
||||||
|
# --- server_states.plugin_version for monitor plugin telemetry ---
|
||||||
|
if _has_table(db, "server_states") and not _has_column(db, "server_states", "plugin_version"):
|
||||||
|
db.execute(text("ALTER TABLE server_states ADD COLUMN plugin_version VARCHAR(64) NULL"))
|
||||||
|
|
||||||
db.commit()
|
db.commit()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
db.rollback()
|
db.rollback()
|
||||||
|
|||||||
@@ -39,6 +39,7 @@ class MonitoredServer(Base):
|
|||||||
identifier = Column(String(128), nullable=False, unique=True)
|
identifier = Column(String(128), nullable=False, unique=True)
|
||||||
display_name = Column(String(128), nullable=True)
|
display_name = Column(String(128), nullable=True)
|
||||||
is_enabled = Column(Boolean, default=True)
|
is_enabled = Column(Boolean, default=True)
|
||||||
|
api_key = Column(String(64), nullable=True, unique=True, index=True) # API Key for server heartbeat v2
|
||||||
created_by = Column(Integer, nullable=True)
|
created_by = Column(Integer, nullable=True)
|
||||||
created_at = Column(DateTime(timezone=True), server_default=func.now())
|
created_at = Column(DateTime(timezone=True), server_default=func.now())
|
||||||
|
|
||||||
@@ -49,6 +50,7 @@ class ServerState(Base):
|
|||||||
id = Column(Integer, primary_key=True, index=True)
|
id = Column(Integer, primary_key=True, index=True)
|
||||||
server_id = Column(Integer, ForeignKey('monitored_servers.id'), nullable=False, unique=True)
|
server_id = Column(Integer, ForeignKey('monitored_servers.id'), nullable=False, unique=True)
|
||||||
openclaw_version = Column(String(64), nullable=True)
|
openclaw_version = Column(String(64), nullable=True)
|
||||||
|
plugin_version = Column(String(64), nullable=True)
|
||||||
agents_json = Column(Text, nullable=True) # json list
|
agents_json = Column(Text, nullable=True) # json list
|
||||||
cpu_pct = Column(Float, nullable=True)
|
cpu_pct = Column(Float, nullable=True)
|
||||||
mem_pct = Column(Float, nullable=True)
|
mem_pct = Column(Float, nullable=True)
|
||||||
@@ -57,22 +59,3 @@ class ServerState(Base):
|
|||||||
last_seen_at = Column(DateTime(timezone=True), nullable=True)
|
last_seen_at = Column(DateTime(timezone=True), nullable=True)
|
||||||
updated_at = Column(DateTime(timezone=True), onupdate=func.now())
|
updated_at = Column(DateTime(timezone=True), onupdate=func.now())
|
||||||
|
|
||||||
|
|
||||||
class ServerChallenge(Base):
|
|
||||||
__tablename__ = 'server_challenges'
|
|
||||||
|
|
||||||
id = Column(Integer, primary_key=True, index=True)
|
|
||||||
server_id = Column(Integer, ForeignKey('monitored_servers.id'), nullable=False, index=True)
|
|
||||||
challenge_uuid = Column(String(64), nullable=False, unique=True, index=True)
|
|
||||||
expires_at = Column(DateTime(timezone=True), nullable=False)
|
|
||||||
used_at = Column(DateTime(timezone=True), nullable=True)
|
|
||||||
created_at = Column(DateTime(timezone=True), server_default=func.now())
|
|
||||||
|
|
||||||
|
|
||||||
class ServerHandshakeNonce(Base):
|
|
||||||
__tablename__ = 'server_handshake_nonces'
|
|
||||||
|
|
||||||
id = Column(Integer, primary_key=True, index=True)
|
|
||||||
server_id = Column(Integer, ForeignKey('monitored_servers.id'), nullable=False, index=True)
|
|
||||||
nonce = Column(String(128), nullable=False, index=True)
|
|
||||||
created_at = Column(DateTime(timezone=True), server_default=func.now())
|
|
||||||
|
|||||||
@@ -288,6 +288,9 @@ def get_server_states_view(db: Session, offline_after_minutes: int = 7):
|
|||||||
for s in servers:
|
for s in servers:
|
||||||
st = db.query(ServerState).filter(ServerState.server_id == s.id).first()
|
st = db.query(ServerState).filter(ServerState.server_id == s.id).first()
|
||||||
last_seen = st.last_seen_at if st else None
|
last_seen = st.last_seen_at if st else None
|
||||||
|
# Handle timezone-naive datetimes from database
|
||||||
|
if last_seen and last_seen.tzinfo is None:
|
||||||
|
last_seen = last_seen.replace(tzinfo=timezone.utc)
|
||||||
online = bool(last_seen and (now - last_seen).total_seconds() <= offline_after_minutes * 60)
|
online = bool(last_seen and (now - last_seen).total_seconds() <= offline_after_minutes * 60)
|
||||||
out.append({
|
out.append({
|
||||||
'server_id': s.id,
|
'server_id': s.id,
|
||||||
@@ -295,6 +298,7 @@ def get_server_states_view(db: Session, offline_after_minutes: int = 7):
|
|||||||
'display_name': s.display_name or s.identifier,
|
'display_name': s.display_name or s.identifier,
|
||||||
'online': online,
|
'online': online,
|
||||||
'openclaw_version': st.openclaw_version if st else None,
|
'openclaw_version': st.openclaw_version if st else None,
|
||||||
|
'plugin_version': st.plugin_version if st else None,
|
||||||
'cpu_pct': st.cpu_pct if st else None,
|
'cpu_pct': st.cpu_pct if st else None,
|
||||||
'mem_pct': st.mem_pct if st else None,
|
'mem_pct': st.mem_pct if st else None,
|
||||||
'disk_pct': st.disk_pct if st else None,
|
'disk_pct': st.disk_pct if st else None,
|
||||||
|
|||||||
76
docs/OPENCLAW_PLUGIN_DEV_PLAN.md
Normal file
76
docs/OPENCLAW_PLUGIN_DEV_PLAN.md
Normal file
@@ -0,0 +1,76 @@
|
|||||||
|
# OpenClaw Plugin 开发计划(当前版)
|
||||||
|
|
||||||
|
**状态**: API Key 方案已落地,challenge / WebSocket 旧方案已废弃。
|
||||||
|
|
||||||
|
## 当前架构
|
||||||
|
|
||||||
|
- HarborForge Monitor Backend 提供服务器注册与遥测接收接口
|
||||||
|
- OpenClaw Gateway 加载 `harborforge-monitor` 插件
|
||||||
|
- 插件在 `gateway_start` 时启动 sidecar (`server/telemetry.mjs`)
|
||||||
|
- sidecar 通过 **HTTP + X-API-Key** 向 Backend 上报遥测
|
||||||
|
|
||||||
|
## 当前后端接口
|
||||||
|
|
||||||
|
### 公开接口
|
||||||
|
- `GET /monitor/public/overview`
|
||||||
|
|
||||||
|
### 管理接口
|
||||||
|
- `GET /monitor/admin/servers`
|
||||||
|
- `POST /monitor/admin/servers`
|
||||||
|
- `DELETE /monitor/admin/servers/{id}`
|
||||||
|
- `POST /monitor/admin/servers/{id}/api-key`
|
||||||
|
- `DELETE /monitor/admin/servers/{id}/api-key`
|
||||||
|
|
||||||
|
### 插件上报接口
|
||||||
|
- `POST /monitor/server/heartbeat-v2`
|
||||||
|
- Header: `X-API-Key`
|
||||||
|
- Body:
|
||||||
|
- `identifier`
|
||||||
|
- `openclaw_version`
|
||||||
|
- `plugin_version`
|
||||||
|
- `agents`
|
||||||
|
- `cpu_pct`
|
||||||
|
- `mem_pct`
|
||||||
|
- `disk_pct`
|
||||||
|
- `swap_pct`
|
||||||
|
- `load_avg`
|
||||||
|
- `uptime_seconds`
|
||||||
|
|
||||||
|
## 数据语义
|
||||||
|
|
||||||
|
- `openclaw_version`: 远程服务器上的 OpenClaw 版本
|
||||||
|
- `plugin_version`: 远程服务器上的 harborforge-monitor 插件版本
|
||||||
|
|
||||||
|
## 已废弃内容
|
||||||
|
|
||||||
|
以下旧方案已经废弃,不再作为实现路径:
|
||||||
|
|
||||||
|
- challenge UUID
|
||||||
|
- `GET /monitor/public/server-public-key`
|
||||||
|
- `POST /monitor/admin/servers/{id}/challenge`
|
||||||
|
- `WS /monitor/server/ws`
|
||||||
|
- challenge / nonce 握手逻辑
|
||||||
|
|
||||||
|
## 前端管理页要求
|
||||||
|
|
||||||
|
Monitor 管理页应提供:
|
||||||
|
|
||||||
|
- Add Server
|
||||||
|
- Generate API Key
|
||||||
|
- Revoke API Key
|
||||||
|
- Delete Server
|
||||||
|
|
||||||
|
不再提供 `Generate Challenge`。
|
||||||
|
|
||||||
|
## 运行流程
|
||||||
|
|
||||||
|
1. 管理员在 Monitor 中注册服务器
|
||||||
|
2. 管理员为服务器生成 API Key
|
||||||
|
3. 将 API Key 写入 `~/.openclaw/openclaw.json`
|
||||||
|
4. 重启 OpenClaw Gateway
|
||||||
|
5. 插件启动 sidecar
|
||||||
|
6. sidecar 定时向 `/monitor/server/heartbeat-v2` 上报
|
||||||
|
|
||||||
|
## 备注
|
||||||
|
|
||||||
|
当前保留了对旧 challenge 数据表的**删除兼容清理**(仅为兼容老数据库中的遗留数据),但不再保留 challenge 功能入口与运行时逻辑。
|
||||||
108
docs/examples/monitor_heartbeat_secure.py
Normal file
108
docs/examples/monitor_heartbeat_secure.py
Normal file
@@ -0,0 +1,108 @@
|
|||||||
|
"""
|
||||||
|
Backend 监控接口需要补充的安全验证代码
|
||||||
|
添加到 app/api/routers/monitor.py
|
||||||
|
"""
|
||||||
|
|
||||||
|
from fastapi import Header
|
||||||
|
|
||||||
|
class ServerHeartbeatSecure(BaseModel):
|
||||||
|
identifier: str
|
||||||
|
challenge_uuid: str # 新增:必须提供 challenge
|
||||||
|
openclaw_version: str | None = None
|
||||||
|
agents: List[dict] = []
|
||||||
|
cpu_pct: float | None = None
|
||||||
|
mem_pct: float | None = None
|
||||||
|
disk_pct: float | None = None
|
||||||
|
swap_pct: float | None = None
|
||||||
|
|
||||||
|
|
||||||
|
@router.post('/server/heartbeat')
|
||||||
|
def server_heartbeat(
|
||||||
|
payload: ServerHeartbeatSecure,
|
||||||
|
x_challenge_uuid: str = Header(..., description='Challenge UUID from registration'),
|
||||||
|
db: Session = Depends(get_db)
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
安全版本的心跳接口,验证 challenge_uuid
|
||||||
|
"""
|
||||||
|
# 1. 验证服务器存在且启用
|
||||||
|
server = db.query(MonitoredServer).filter(
|
||||||
|
MonitoredServer.identifier == payload.identifier,
|
||||||
|
MonitoredServer.is_enabled == True
|
||||||
|
).first()
|
||||||
|
if not server:
|
||||||
|
raise HTTPException(status_code=404, detail='unknown server identifier')
|
||||||
|
|
||||||
|
# 2. 验证 challenge_uuid 存在且有效
|
||||||
|
ch = db.query(ServerChallenge).filter(
|
||||||
|
ServerChallenge.challenge_uuid == x_challenge_uuid,
|
||||||
|
ServerChallenge.server_id == server.id
|
||||||
|
).first()
|
||||||
|
|
||||||
|
if not ch:
|
||||||
|
raise HTTPException(status_code=401, detail='invalid challenge')
|
||||||
|
|
||||||
|
if ch.expires_at < datetime.now(timezone.utc):
|
||||||
|
raise HTTPException(status_code=401, detail='challenge expired')
|
||||||
|
|
||||||
|
# 3. 可选:检查 challenge 是否已被使用过
|
||||||
|
# 如果是首次验证,标记为已使用
|
||||||
|
if ch.used_at is None:
|
||||||
|
ch.used_at = datetime.now(timezone.utc)
|
||||||
|
|
||||||
|
# 4. 存储状态
|
||||||
|
st = db.query(ServerState).filter(ServerState.server_id == server.id).first()
|
||||||
|
if not st:
|
||||||
|
st = ServerState(server_id=server.id)
|
||||||
|
db.add(st)
|
||||||
|
|
||||||
|
st.openclaw_version = payload.openclaw_version
|
||||||
|
st.agents_json = json.dumps(payload.agents, ensure_ascii=False)
|
||||||
|
st.cpu_pct = payload.cpu_pct
|
||||||
|
st.mem_pct = payload.mem_pct
|
||||||
|
st.disk_pct = payload.disk_pct
|
||||||
|
st.swap_pct = payload.swap_pct
|
||||||
|
st.last_seen_at = datetime.now(timezone.utc)
|
||||||
|
|
||||||
|
db.commit()
|
||||||
|
|
||||||
|
return {
|
||||||
|
'ok': True,
|
||||||
|
'server_id': server.id,
|
||||||
|
'last_seen_at': st.last_seen_at,
|
||||||
|
'challenge_valid_until': ch.expires_at.isoformat()
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# 或者,如果需要长期有效的 API Key 方式:
|
||||||
|
|
||||||
|
class ServerHeartbeatApiKey(BaseModel):
|
||||||
|
identifier: str
|
||||||
|
openclaw_version: str | None = None
|
||||||
|
agents: List[dict] = []
|
||||||
|
cpu_pct: float | None = None
|
||||||
|
mem_pct: float | None = None
|
||||||
|
disk_pct: float | None = None
|
||||||
|
swap_pct: float | None = None
|
||||||
|
|
||||||
|
|
||||||
|
@router.post('/server/heartbeat-v2')
|
||||||
|
def server_heartbeat_v2(
|
||||||
|
payload: ServerHeartbeatApiKey,
|
||||||
|
x_api_key: str = Header(..., description='Server API Key'),
|
||||||
|
db: Session = Depends(get_db)
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
使用 API Key 的心跳接口(长期有效,不需要 challenge)
|
||||||
|
需要在 MonitoredServer 模型中添加 api_key 字段
|
||||||
|
"""
|
||||||
|
server = db.query(MonitoredServer).filter(
|
||||||
|
MonitoredServer.identifier == payload.identifier,
|
||||||
|
MonitoredServer.is_enabled == True,
|
||||||
|
MonitoredServer.api_key == x_api_key # 需要添加 api_key 字段
|
||||||
|
).first()
|
||||||
|
|
||||||
|
if not server:
|
||||||
|
raise HTTPException(status_code=401, detail='invalid identifier or api key')
|
||||||
|
|
||||||
|
# ... 存储状态 ...
|
||||||
@@ -1,68 +1,76 @@
|
|||||||
# OpenClaw Monitor Agent Plugin 开发计划(草案)
|
# HarborForge Monitor / OpenClaw Plugin Connector Plan
|
||||||
|
|
||||||
## 目标
|
## 目标
|
||||||
让被监测服务器通过 WebSocket 主动接入 HarborForge Backend,并持续上报:
|
|
||||||
- OpenClaw 版本
|
|
||||||
- agent 列表
|
|
||||||
- 每 5 分钟主机指标(CPU/MEM/DISK/SWAP)
|
|
||||||
- agent 状态变更事件
|
|
||||||
|
|
||||||
## 握手流程
|
使用 **API Key + HTTP heartbeat** 连接 HarborForge Monitor 与远程 OpenClaw 节点。
|
||||||
1. Admin 在 HarborForge 后台添加 server identifier
|
|
||||||
2. Admin 生成 challenge(10 分钟有效)
|
|
||||||
3. 插件请求 `GET /monitor/public/server-public-key` 获取公钥
|
|
||||||
4. 插件构造 payload:
|
|
||||||
- `identifier`
|
|
||||||
- `challenge_uuid`
|
|
||||||
- `nonce`(随机)
|
|
||||||
- `ts`(ISO8601)
|
|
||||||
5. 使用 RSA-OAEP(SHA256) 公钥加密,base64 后作为 `encrypted_payload` 发给 `WS /monitor/server/ws`
|
|
||||||
6. 握手成功后进入事件上报通道
|
|
||||||
|
|
||||||
## 插件事件协议
|
## 认证方式
|
||||||
### server.hello
|
|
||||||
|
- 管理员为服务器生成 API Key
|
||||||
|
- 插件通过 `X-API-Key` 调用 heartbeat 接口
|
||||||
|
- 不再使用 challenge / RSA 公钥 / WebSocket 握手
|
||||||
|
|
||||||
|
## 上报接口
|
||||||
|
|
||||||
|
`POST /monitor/server/heartbeat-v2`
|
||||||
|
|
||||||
|
### Headers
|
||||||
|
- `X-API-Key: <server-api-key>`
|
||||||
|
|
||||||
|
### Payload
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"event": "server.hello",
|
"identifier": "vps.t1",
|
||||||
"payload": {
|
"openclaw_version": "OpenClaw 2026.3.13 (61d171a)",
|
||||||
"openclaw_version": "x.y.z",
|
"plugin_version": "0.1.0",
|
||||||
"agents": [{"id": "a1", "name": "agent-1", "status": "idle"}]
|
"agents": [
|
||||||
|
{ "id": "agent-bot1", "name": "agent-bot1", "status": "configured" }
|
||||||
|
],
|
||||||
|
"cpu_pct": 12.3,
|
||||||
|
"mem_pct": 45.6,
|
||||||
|
"disk_pct": 78.9,
|
||||||
|
"swap_pct": 0,
|
||||||
|
"load_avg": [0.12, 0.08, 0.03],
|
||||||
|
"uptime_seconds": 12345
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## 语义
|
||||||
|
|
||||||
|
- `openclaw_version`: 远程主机上的 OpenClaw 版本
|
||||||
|
- `plugin_version`: harborforge-monitor 插件版本
|
||||||
|
|
||||||
|
## 插件生命周期
|
||||||
|
|
||||||
|
- 插件注册到 Gateway
|
||||||
|
- 在 `gateway_start` 启动 `server/telemetry.mjs`
|
||||||
|
- 在 `gateway_stop` 停止 sidecar
|
||||||
|
|
||||||
|
## 配置位置
|
||||||
|
|
||||||
|
`~/.openclaw/openclaw.json`
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"plugins": {
|
||||||
|
"entries": {
|
||||||
|
"harborforge-monitor": {
|
||||||
|
"enabled": true,
|
||||||
|
"config": {
|
||||||
|
"enabled": true,
|
||||||
|
"backendUrl": "http://127.0.0.1:8000",
|
||||||
|
"identifier": "vps.t1",
|
||||||
|
"apiKey": "your-api-key"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
### server.metrics(每 5 分钟)
|
## 已废弃
|
||||||
```json
|
|
||||||
{
|
|
||||||
"event": "server.metrics",
|
|
||||||
"payload": {
|
|
||||||
"cpu_pct": 21.3,
|
|
||||||
"mem_pct": 42.1,
|
|
||||||
"disk_pct": 55.9,
|
|
||||||
"swap_pct": 0.0,
|
|
||||||
"agents": [{"id": "a1", "name": "agent-1", "status": "busy"}]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
### agent.status_changed(可选)
|
- challenge UUID
|
||||||
```json
|
- server public key
|
||||||
{
|
- WebSocket telemetry
|
||||||
"event": "agent.status_changed",
|
- encrypted handshake payload
|
||||||
"payload": {
|
|
||||||
"agents": [{"id": "a1", "name": "agent-1", "status": "focus"}]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
## 实施里程碑
|
|
||||||
- M1: Node/Python CLI 插件最小握手联通
|
|
||||||
- M2: 指标采集 + 周期上报
|
|
||||||
- M3: agent 状态采集与变更事件
|
|
||||||
- M4: 守护化(systemd)+ 断线重连 + 本地日志
|
|
||||||
|
|
||||||
## 风险与注意事项
|
|
||||||
- 时钟漂移会导致 `ts` 校验失败(建议 NTP)
|
|
||||||
- challenge 仅一次可用,重复使用会被拒绝
|
|
||||||
- nonce 重放会被拒绝
|
|
||||||
- 需要保证插件本地安全保存 identifier/challenge(短期)
|
|
||||||
|
|||||||
Reference in New Issue
Block a user