Files
HarborForge.Backend/app/api/routers/monitor.py
zhi 14dcda3cdc feat(monitor): store nginx telemetry for generic clients
- accept nginx installation status and sites-enabled list
- persist nginx fields in server state
- expose nginx data in monitor overview/admin views
- auto-migrate new server_states columns on startup
2026-03-20 10:03:56 +00:00

260 lines
9.8 KiB
Python

from datetime import datetime, timezone
import json
import secrets
from typing import List
from fastapi import APIRouter, Depends, Header, HTTPException, status
from pydantic import BaseModel
from sqlalchemy import text
from sqlalchemy.orm import Session
from app.core.config import get_db
from app.api.deps import get_current_user_or_apikey
from app.models import models
from app.models.monitor import (
ProviderAccount,
MonitoredServer,
ServerState,
)
from app.services.monitoring import (
get_task_stats_cached,
get_provider_usage_view,
get_server_states_view,
test_provider_connection,
)
router = APIRouter(prefix='/monitor', tags=['Monitor'])
SUPPORTED_PROVIDERS = {'anthropic', 'openai', 'minimax', 'kimi', 'qwen'}
class ProviderAccountCreate(BaseModel):
provider: str
label: str
credential: str
class ProviderTestRequest(BaseModel):
provider: str
credential: str
class MonitoredServerCreate(BaseModel):
identifier: str
display_name: str | None = None
def require_admin(current_user: models.User = Depends(get_current_user_or_apikey)):
if not current_user.is_admin:
raise HTTPException(status_code=403, detail='Admin required')
return current_user
@router.get('/public/overview')
def public_overview(db: Session = Depends(get_db)):
return {
'tasks': get_task_stats_cached(db, ttl_seconds=1800),
'providers': get_provider_usage_view(db),
'servers': get_server_states_view(db, offline_after_minutes=7),
'generated_at': datetime.now(timezone.utc).isoformat(),
}
@router.get('/admin/providers/accounts')
def list_provider_accounts(db: Session = Depends(get_db), _: models.User = Depends(require_admin)):
accounts = db.query(ProviderAccount).order_by(ProviderAccount.created_at.desc()).all()
return [
{
'id': a.id,
'provider': a.provider,
'label': a.label,
'is_enabled': a.is_enabled,
'created_at': a.created_at,
'credential_masked': '***' + (a.credential[-4:] if a.credential else ''),
}
for a in accounts
]
@router.post('/admin/providers/accounts', status_code=status.HTTP_201_CREATED)
def create_provider_account(payload: ProviderAccountCreate, db: Session = Depends(get_db), user: models.User = Depends(require_admin)):
provider = payload.provider.lower().strip()
if provider not in SUPPORTED_PROVIDERS:
raise HTTPException(status_code=400, detail=f'Unsupported provider: {provider}')
obj = ProviderAccount(
provider=provider,
label=payload.label.strip(),
credential=payload.credential.strip(),
is_enabled=True,
created_by=user.id,
)
db.add(obj)
db.commit()
db.refresh(obj)
return {'id': obj.id, 'provider': obj.provider, 'label': obj.label, 'is_enabled': obj.is_enabled}
@router.post('/admin/providers/test')
def test_provider(payload: ProviderTestRequest, _: models.User = Depends(require_admin)):
ok, message = test_provider_connection(payload.provider.lower().strip(), payload.credential.strip())
return {'ok': ok, 'message': message}
@router.delete('/admin/providers/accounts/{account_id}', status_code=status.HTTP_204_NO_CONTENT)
def delete_provider_account(account_id: int, db: Session = Depends(get_db), _: models.User = Depends(require_admin)):
obj = db.query(ProviderAccount).filter(ProviderAccount.id == account_id).first()
if not obj:
raise HTTPException(status_code=404, detail='Provider account not found')
db.delete(obj)
db.commit()
return None
@router.get('/admin/servers')
def list_servers(db: Session = Depends(get_db), _: models.User = Depends(require_admin)):
return get_server_states_view(db, offline_after_minutes=7)
@router.post('/admin/servers', status_code=status.HTTP_201_CREATED)
def add_server(payload: MonitoredServerCreate, db: Session = Depends(get_db), user: models.User = Depends(require_admin)):
identifier = payload.identifier.strip()
if not identifier:
raise HTTPException(status_code=400, detail='identifier required')
exists = db.query(MonitoredServer).filter(MonitoredServer.identifier == identifier).first()
if exists:
raise HTTPException(status_code=400, detail='identifier already exists')
obj = MonitoredServer(identifier=identifier, display_name=payload.display_name, is_enabled=True, created_by=user.id)
db.add(obj)
db.commit()
db.refresh(obj)
return {'id': obj.id, 'identifier': obj.identifier, 'display_name': obj.display_name, 'is_enabled': obj.is_enabled}
@router.delete('/admin/servers/{server_id}', status_code=status.HTTP_204_NO_CONTENT)
def delete_server(server_id: int, db: Session = Depends(get_db), _: models.User = Depends(require_admin)):
obj = db.query(MonitoredServer).filter(MonitoredServer.id == server_id).first()
if not obj:
raise HTTPException(status_code=404, detail='Server not found')
# Delete dependent rows first to avoid FK errors.
db.query(ServerState).filter(ServerState.server_id == server_id).delete(synchronize_session=False)
# Backward-compatible cleanup for deprecated challenge tables that may still exist in older DBs.
try:
db.execute(text('DELETE FROM server_handshake_nonces WHERE server_id = :server_id'), {'server_id': server_id})
except Exception:
pass
try:
db.execute(text('DELETE FROM server_challenges WHERE server_id = :server_id'), {'server_id': server_id})
except Exception:
pass
db.delete(obj)
db.commit()
return None
@router.post('/admin/servers/{server_id}/api-key')
def generate_api_key(server_id: int, db: Session = Depends(get_db), _: models.User = Depends(require_admin)):
"""Generate or regenerate API Key for a server (heartbeat v2)"""
server = db.query(MonitoredServer).filter(MonitoredServer.id == server_id).first()
if not server:
raise HTTPException(status_code=404, detail='Server not found')
api_key = secrets.token_urlsafe(32)
server.api_key = api_key
db.commit()
return {'server_id': server.id, 'api_key': api_key, 'message': 'Store this key securely - it will not be shown again'}
@router.delete('/admin/servers/{server_id}/api-key', status_code=status.HTTP_204_NO_CONTENT)
def revoke_api_key(server_id: int, db: Session = Depends(get_db), _: models.User = Depends(require_admin)):
"""Revoke API Key for a server"""
server = db.query(MonitoredServer).filter(MonitoredServer.id == server_id).first()
if not server:
raise HTTPException(status_code=404, detail='Server not found')
server.api_key = None
db.commit()
return None
class ServerHeartbeat(BaseModel):
identifier: str
openclaw_version: str | None = None
plugin_version: str | None = None
agents: List[dict] = []
nginx_installed: bool | None = None
nginx_sites: List[str] = []
cpu_pct: float | None = None
mem_pct: float | None = None
disk_pct: float | None = None
swap_pct: float | None = None
@router.post('/server/heartbeat')
def server_heartbeat(payload: ServerHeartbeat, db: Session = Depends(get_db)):
server = db.query(MonitoredServer).filter(MonitoredServer.identifier == payload.identifier, MonitoredServer.is_enabled == True).first()
if not server:
raise HTTPException(status_code=404, detail='unknown server identifier')
st = db.query(ServerState).filter(ServerState.server_id == server.id).first()
if not st:
st = ServerState(server_id=server.id)
db.add(st)
st.openclaw_version = payload.openclaw_version
st.plugin_version = payload.plugin_version
st.agents_json = json.dumps(payload.agents, ensure_ascii=False)
st.nginx_installed = payload.nginx_installed
st.nginx_sites_json = json.dumps(payload.nginx_sites, ensure_ascii=False)
st.cpu_pct = payload.cpu_pct
st.mem_pct = payload.mem_pct
st.disk_pct = payload.disk_pct
st.swap_pct = payload.swap_pct
st.last_seen_at = datetime.now(timezone.utc)
db.commit()
return {'ok': True, 'server_id': server.id, 'last_seen_at': st.last_seen_at}
# Heartbeat v2 with API Key authentication
class TelemetryPayload(BaseModel):
identifier: str
openclaw_version: str | None = None
plugin_version: str | None = None
agents: List[dict] = []
nginx_installed: bool | None = None
nginx_sites: List[str] = []
cpu_pct: float | None = None
mem_pct: float | None = None
disk_pct: float | None = None
swap_pct: float | None = None
load_avg: list[float] | None = None
uptime_seconds: int | None = None
@router.post('/server/heartbeat-v2')
def server_heartbeat_v2(
payload: TelemetryPayload,
x_api_key: str = Header(..., alias='X-API-Key', description='API Key from /admin/servers/{id}/api-key'),
db: Session = Depends(get_db)
):
"""Server heartbeat using API Key authentication (no challenge_uuid required)"""
server = db.query(MonitoredServer).filter(
MonitoredServer.api_key == x_api_key,
MonitoredServer.is_enabled == True
).first()
if not server:
raise HTTPException(status_code=401, detail='Invalid or missing API Key')
st = db.query(ServerState).filter(ServerState.server_id == server.id).first()
if not st:
st = ServerState(server_id=server.id)
db.add(st)
st.openclaw_version = payload.openclaw_version
st.plugin_version = payload.plugin_version
st.agents_json = json.dumps(payload.agents, ensure_ascii=False)
st.nginx_installed = payload.nginx_installed
st.nginx_sites_json = json.dumps(payload.nginx_sites, ensure_ascii=False)
st.cpu_pct = payload.cpu_pct
st.mem_pct = payload.mem_pct
st.disk_pct = payload.disk_pct
st.swap_pct = payload.swap_pct
st.last_seen_at = datetime.now(timezone.utc)
db.commit()
return {'ok': True, 'server_id': server.id, 'identifier': server.identifier, 'last_seen_at': st.last_seen_at}