- Add server_states.plugin_version column - Keep openclaw_version for remote OpenClaw runtime version - Expose plugin_version in monitor server view - Accept and persist plugin_version in heartbeat payloads
362 lines
14 KiB
Python
362 lines
14 KiB
Python
from datetime import datetime, timedelta, timezone
|
|
import json
|
|
import secrets
|
|
import uuid
|
|
from typing import List, Dict
|
|
|
|
from fastapi import APIRouter, Depends, Header, HTTPException, status, WebSocket, WebSocketDisconnect
|
|
from pydantic import BaseModel
|
|
from sqlalchemy.orm import Session
|
|
|
|
from app.core.config import get_db, SessionLocal
|
|
from app.api.deps import get_current_user_or_apikey
|
|
from app.models import models
|
|
from app.models.monitor import (
|
|
ProviderAccount,
|
|
MonitoredServer,
|
|
ServerState,
|
|
ServerChallenge,
|
|
ServerHandshakeNonce,
|
|
)
|
|
from app.services.monitoring import (
|
|
get_task_stats_cached,
|
|
get_provider_usage_view,
|
|
get_server_states_view,
|
|
test_provider_connection,
|
|
)
|
|
from app.services.crypto_box import get_public_key_info, decrypt_payload_b64, ts_within
|
|
|
|
router = APIRouter(prefix='/monitor', tags=['Monitor'])
|
|
SUPPORTED_PROVIDERS = {'anthropic', 'openai', 'minimax', 'kimi', 'qwen'}
|
|
ACTIVE_WS: Dict[int, WebSocket] = {}
|
|
|
|
|
|
class ProviderAccountCreate(BaseModel):
|
|
provider: str
|
|
label: str
|
|
credential: str
|
|
|
|
|
|
class ProviderTestRequest(BaseModel):
|
|
provider: str
|
|
credential: str
|
|
|
|
|
|
class MonitoredServerCreate(BaseModel):
|
|
identifier: str
|
|
display_name: str | None = None
|
|
|
|
|
|
class ChallengeResponse(BaseModel):
|
|
identifier: str
|
|
challenge_uuid: str
|
|
expires_at: str
|
|
|
|
|
|
def require_admin(current_user: models.User = Depends(get_current_user_or_apikey)):
|
|
if not current_user.is_admin:
|
|
raise HTTPException(status_code=403, detail='Admin required')
|
|
return current_user
|
|
|
|
|
|
@router.get('/public/server-public-key')
|
|
def monitor_public_key():
|
|
return get_public_key_info()
|
|
|
|
|
|
@router.get('/public/overview')
|
|
def public_overview(db: Session = Depends(get_db)):
|
|
return {
|
|
'tasks': get_task_stats_cached(db, ttl_seconds=1800),
|
|
'providers': get_provider_usage_view(db),
|
|
'servers': get_server_states_view(db, offline_after_minutes=7),
|
|
'generated_at': datetime.now(timezone.utc).isoformat(),
|
|
}
|
|
|
|
|
|
@router.get('/admin/providers/accounts')
|
|
def list_provider_accounts(db: Session = Depends(get_db), _: models.User = Depends(require_admin)):
|
|
accounts = db.query(ProviderAccount).order_by(ProviderAccount.created_at.desc()).all()
|
|
return [
|
|
{
|
|
'id': a.id,
|
|
'provider': a.provider,
|
|
'label': a.label,
|
|
'is_enabled': a.is_enabled,
|
|
'created_at': a.created_at,
|
|
'credential_masked': '***' + (a.credential[-4:] if a.credential else ''),
|
|
}
|
|
for a in accounts
|
|
]
|
|
|
|
|
|
@router.post('/admin/providers/accounts', status_code=status.HTTP_201_CREATED)
|
|
def create_provider_account(payload: ProviderAccountCreate, db: Session = Depends(get_db), user: models.User = Depends(require_admin)):
|
|
provider = payload.provider.lower().strip()
|
|
if provider not in SUPPORTED_PROVIDERS:
|
|
raise HTTPException(status_code=400, detail=f'Unsupported provider: {provider}')
|
|
obj = ProviderAccount(
|
|
provider=provider,
|
|
label=payload.label.strip(),
|
|
credential=payload.credential.strip(),
|
|
is_enabled=True,
|
|
created_by=user.id,
|
|
)
|
|
db.add(obj)
|
|
db.commit()
|
|
db.refresh(obj)
|
|
return {'id': obj.id, 'provider': obj.provider, 'label': obj.label, 'is_enabled': obj.is_enabled}
|
|
|
|
|
|
@router.post('/admin/providers/test')
|
|
def test_provider(payload: ProviderTestRequest, _: models.User = Depends(require_admin)):
|
|
ok, message = test_provider_connection(payload.provider.lower().strip(), payload.credential.strip())
|
|
return {'ok': ok, 'message': message}
|
|
|
|
|
|
@router.delete('/admin/providers/accounts/{account_id}', status_code=status.HTTP_204_NO_CONTENT)
|
|
def delete_provider_account(account_id: int, db: Session = Depends(get_db), _: models.User = Depends(require_admin)):
|
|
obj = db.query(ProviderAccount).filter(ProviderAccount.id == account_id).first()
|
|
if not obj:
|
|
raise HTTPException(status_code=404, detail='Provider account not found')
|
|
db.delete(obj)
|
|
db.commit()
|
|
return None
|
|
|
|
|
|
@router.get('/admin/servers')
|
|
def list_servers(db: Session = Depends(get_db), _: models.User = Depends(require_admin)):
|
|
return get_server_states_view(db, offline_after_minutes=7)
|
|
|
|
|
|
@router.post('/admin/servers', status_code=status.HTTP_201_CREATED)
|
|
def add_server(payload: MonitoredServerCreate, db: Session = Depends(get_db), user: models.User = Depends(require_admin)):
|
|
identifier = payload.identifier.strip()
|
|
if not identifier:
|
|
raise HTTPException(status_code=400, detail='identifier required')
|
|
exists = db.query(MonitoredServer).filter(MonitoredServer.identifier == identifier).first()
|
|
if exists:
|
|
raise HTTPException(status_code=400, detail='identifier already exists')
|
|
obj = MonitoredServer(identifier=identifier, display_name=payload.display_name, is_enabled=True, created_by=user.id)
|
|
db.add(obj)
|
|
db.commit()
|
|
db.refresh(obj)
|
|
return {'id': obj.id, 'identifier': obj.identifier, 'display_name': obj.display_name, 'is_enabled': obj.is_enabled}
|
|
|
|
|
|
@router.post('/admin/servers/{server_id}/challenge', response_model=ChallengeResponse)
|
|
def issue_server_challenge(server_id: int, db: Session = Depends(get_db), _: models.User = Depends(require_admin)):
|
|
server = db.query(MonitoredServer).filter(MonitoredServer.id == server_id).first()
|
|
if not server:
|
|
raise HTTPException(status_code=404, detail='Server not found')
|
|
challenge_uuid = str(uuid.uuid4())
|
|
expires_at = datetime.now(timezone.utc) + timedelta(minutes=10)
|
|
ch = ServerChallenge(server_id=server_id, challenge_uuid=challenge_uuid, expires_at=expires_at)
|
|
db.add(ch)
|
|
db.commit()
|
|
return ChallengeResponse(identifier=server.identifier, challenge_uuid=challenge_uuid, expires_at=expires_at.isoformat())
|
|
|
|
|
|
@router.delete('/admin/servers/{server_id}', status_code=status.HTTP_204_NO_CONTENT)
|
|
def delete_server(server_id: int, db: Session = Depends(get_db), _: models.User = Depends(require_admin)):
|
|
obj = db.query(MonitoredServer).filter(MonitoredServer.id == server_id).first()
|
|
if not obj:
|
|
raise HTTPException(status_code=404, detail='Server not found')
|
|
state = db.query(ServerState).filter(ServerState.server_id == server_id).first()
|
|
if state:
|
|
db.delete(state)
|
|
db.query(ServerChallenge).filter(ServerChallenge.server_id == server_id).delete()
|
|
db.query(ServerHandshakeNonce).filter(ServerHandshakeNonce.server_id == server_id).delete()
|
|
db.delete(obj)
|
|
db.commit()
|
|
return None
|
|
|
|
|
|
@router.post('/admin/servers/{server_id}/api-key')
|
|
def generate_api_key(server_id: int, db: Session = Depends(get_db), _: models.User = Depends(require_admin)):
|
|
"""Generate or regenerate API Key for a server (heartbeat v2)"""
|
|
server = db.query(MonitoredServer).filter(MonitoredServer.id == server_id).first()
|
|
if not server:
|
|
raise HTTPException(status_code=404, detail='Server not found')
|
|
api_key = secrets.token_urlsafe(32)
|
|
server.api_key = api_key
|
|
db.commit()
|
|
return {'server_id': server.id, 'api_key': api_key, 'message': 'Store this key securely - it will not be shown again'}
|
|
|
|
|
|
@router.delete('/admin/servers/{server_id}/api-key', status_code=status.HTTP_204_NO_CONTENT)
|
|
def revoke_api_key(server_id: int, db: Session = Depends(get_db), _: models.User = Depends(require_admin)):
|
|
"""Revoke API Key for a server"""
|
|
server = db.query(MonitoredServer).filter(MonitoredServer.id == server_id).first()
|
|
if not server:
|
|
raise HTTPException(status_code=404, detail='Server not found')
|
|
server.api_key = None
|
|
db.commit()
|
|
return None
|
|
|
|
|
|
class ServerHeartbeat(BaseModel):
|
|
identifier: str
|
|
openclaw_version: str | None = None
|
|
plugin_version: str | None = None
|
|
agents: List[dict] = []
|
|
cpu_pct: float | None = None
|
|
mem_pct: float | None = None
|
|
disk_pct: float | None = None
|
|
swap_pct: float | None = None
|
|
|
|
|
|
@router.post('/server/heartbeat')
|
|
def server_heartbeat(payload: ServerHeartbeat, db: Session = Depends(get_db)):
|
|
server = db.query(MonitoredServer).filter(MonitoredServer.identifier == payload.identifier, MonitoredServer.is_enabled == True).first()
|
|
if not server:
|
|
raise HTTPException(status_code=404, detail='unknown server identifier')
|
|
st = db.query(ServerState).filter(ServerState.server_id == server.id).first()
|
|
if not st:
|
|
st = ServerState(server_id=server.id)
|
|
db.add(st)
|
|
st.openclaw_version = payload.openclaw_version
|
|
st.plugin_version = payload.plugin_version
|
|
st.agents_json = json.dumps(payload.agents, ensure_ascii=False)
|
|
st.cpu_pct = payload.cpu_pct
|
|
st.mem_pct = payload.mem_pct
|
|
st.disk_pct = payload.disk_pct
|
|
st.swap_pct = payload.swap_pct
|
|
st.last_seen_at = datetime.now(timezone.utc)
|
|
db.commit()
|
|
return {'ok': True, 'server_id': server.id, 'last_seen_at': st.last_seen_at}
|
|
|
|
|
|
# Heartbeat v2 with API Key authentication
|
|
class TelemetryPayload(BaseModel):
|
|
identifier: str
|
|
openclaw_version: str | None = None
|
|
plugin_version: str | None = None
|
|
agents: List[dict] = []
|
|
cpu_pct: float | None = None
|
|
mem_pct: float | None = None
|
|
disk_pct: float | None = None
|
|
swap_pct: float | None = None
|
|
load_avg: list[float] | None = None
|
|
uptime_seconds: int | None = None
|
|
|
|
|
|
@router.post('/server/heartbeat-v2')
|
|
def server_heartbeat_v2(
|
|
payload: TelemetryPayload,
|
|
x_api_key: str = Header(..., alias='X-API-Key', description='API Key from /admin/servers/{id}/api-key'),
|
|
db: Session = Depends(get_db)
|
|
):
|
|
"""Server heartbeat using API Key authentication (no challenge_uuid required)"""
|
|
server = db.query(MonitoredServer).filter(
|
|
MonitoredServer.api_key == x_api_key,
|
|
MonitoredServer.is_enabled == True
|
|
).first()
|
|
if not server:
|
|
raise HTTPException(status_code=401, detail='Invalid or missing API Key')
|
|
st = db.query(ServerState).filter(ServerState.server_id == server.id).first()
|
|
if not st:
|
|
st = ServerState(server_id=server.id)
|
|
db.add(st)
|
|
st.openclaw_version = payload.openclaw_version
|
|
st.plugin_version = payload.plugin_version
|
|
st.agents_json = json.dumps(payload.agents, ensure_ascii=False)
|
|
st.cpu_pct = payload.cpu_pct
|
|
st.mem_pct = payload.mem_pct
|
|
st.disk_pct = payload.disk_pct
|
|
st.swap_pct = payload.swap_pct
|
|
st.last_seen_at = datetime.now(timezone.utc)
|
|
db.commit()
|
|
return {'ok': True, 'server_id': server.id, 'identifier': server.identifier, 'last_seen_at': st.last_seen_at}
|
|
|
|
|
|
@router.websocket('/server/ws')
|
|
async def server_ws(websocket: WebSocket):
|
|
await websocket.accept()
|
|
db = SessionLocal()
|
|
server_id = None
|
|
try:
|
|
hello = await websocket.receive_json()
|
|
|
|
encrypted_payload = (hello.get('encrypted_payload') or '').strip()
|
|
if encrypted_payload:
|
|
data = decrypt_payload_b64(encrypted_payload)
|
|
identifier = (data.get('identifier') or '').strip()
|
|
challenge_uuid = (data.get('challenge_uuid') or '').strip()
|
|
nonce = (data.get('nonce') or '').strip()
|
|
ts = data.get('ts')
|
|
if not ts_within(ts, max_minutes=10):
|
|
await websocket.close(code=4401)
|
|
return
|
|
else:
|
|
# backward compatible mode
|
|
identifier = (hello.get('identifier') or '').strip()
|
|
challenge_uuid = (hello.get('challenge_uuid') or '').strip()
|
|
nonce = (hello.get('nonce') or '').strip()
|
|
|
|
if not identifier or not challenge_uuid or not nonce:
|
|
await websocket.close(code=4400)
|
|
return
|
|
|
|
server = db.query(MonitoredServer).filter(MonitoredServer.identifier == identifier, MonitoredServer.is_enabled == True).first()
|
|
if not server:
|
|
await websocket.close(code=4404)
|
|
return
|
|
|
|
ch = db.query(ServerChallenge).filter(ServerChallenge.challenge_uuid == challenge_uuid, ServerChallenge.server_id == server.id).first()
|
|
if not ch or ch.used_at is not None or ch.expires_at < datetime.now(timezone.utc):
|
|
await websocket.close(code=4401)
|
|
return
|
|
|
|
nonce_used = db.query(ServerHandshakeNonce).filter(ServerHandshakeNonce.server_id == server.id, ServerHandshakeNonce.nonce == nonce).first()
|
|
if nonce_used:
|
|
await websocket.close(code=4409)
|
|
return
|
|
|
|
db.add(ServerHandshakeNonce(server_id=server.id, nonce=nonce))
|
|
ch.used_at = datetime.now(timezone.utc)
|
|
db.commit()
|
|
|
|
server_id = server.id
|
|
ACTIVE_WS[server.id] = websocket
|
|
await websocket.send_json({'ok': True, 'server_id': server.id, 'message': 'connected'})
|
|
|
|
while True:
|
|
msg = await websocket.receive_json()
|
|
event = msg.get('event')
|
|
payload = msg.get('payload') or {}
|
|
st = db.query(ServerState).filter(ServerState.server_id == server.id).first()
|
|
if not st:
|
|
st = ServerState(server_id=server.id)
|
|
db.add(st)
|
|
|
|
if event == 'server.hello':
|
|
st.openclaw_version = payload.get('openclaw_version')
|
|
st.plugin_version = payload.get('plugin_version')
|
|
st.agents_json = json.dumps(payload.get('agents') or [], ensure_ascii=False)
|
|
elif event in {'server.metrics', 'agent.status_changed'}:
|
|
st.cpu_pct = payload.get('cpu_pct', st.cpu_pct)
|
|
st.mem_pct = payload.get('mem_pct', st.mem_pct)
|
|
st.disk_pct = payload.get('disk_pct', st.disk_pct)
|
|
st.swap_pct = payload.get('swap_pct', st.swap_pct)
|
|
if 'openclaw_version' in payload:
|
|
st.openclaw_version = payload.get('openclaw_version')
|
|
if 'plugin_version' in payload:
|
|
st.plugin_version = payload.get('plugin_version')
|
|
if 'agents' in payload:
|
|
st.agents_json = json.dumps(payload.get('agents') or [], ensure_ascii=False)
|
|
|
|
st.last_seen_at = datetime.now(timezone.utc)
|
|
db.commit()
|
|
except WebSocketDisconnect:
|
|
pass
|
|
except Exception:
|
|
try:
|
|
await websocket.close(code=1011)
|
|
except Exception:
|
|
pass
|
|
finally:
|
|
if server_id and ACTIVE_WS.get(server_id) is websocket:
|
|
ACTIVE_WS.pop(server_id, None)
|
|
db.close()
|