Security hardening: fix RCE, auth and SSRF issues

Critical:
- backup: prevent Zip Slip path traversal and zip bombs in restore/convert
  via safe_extract(); serialize get_backup() with backup_lock and always
  restore CWD so concurrent requests can't corrupt the os.chdir state
- app: only enable the Werkzeug debugger/reloader when ENVIRONMENT=dev;
  always init rate limits (also under WSGI), not just under __main__
- apikey: fix create_key never committing (session.commit -> commit()),
  validate roles against an allowlist, and fix revoke_key/update_last_used
  operating on detached instances so revocation actually persists
- env_provider: redact DB_PASSWORD and SESSION_SECRET_KEY in summerize()

High:
- markdown: filter private/protected docs for non-admins in the listing,
  get_home, get_index and search endpoints (was an anonymous data leak);
  escape LIKE metacharacters and cap search results
- webhooks: validate target URL to block SSRF (loopback/private/link-local/
  metadata IPs), disable redirects, safely parse additional_header
- auth: validate JWT issuer and require exp/iat; add timeout to JWKS fetch;
  harden Authorization header parsing against malformed values
- log: require admin for GET /api/log and auth for POST; bound entry size

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
h z
2026-05-16 16:12:43 +01:00
parent 1f4ca52a10
commit 58f23ddcb8
8 changed files with 225 additions and 35 deletions

View File

@@ -7,10 +7,55 @@ from events import MARKDOWN_CREATED_EVENT, MARKDOWN_UPDATED_EVENT, MARKDOWN_DELE
PATH_UPDATED_EVENT, PATH_DELETED_EVENT
import abc
import importlib
import ipaddress
import json
import pkgutil
import socket
import requests
import db
from urllib.parse import urlsplit
import logging
logger = logging.getLogger(__name__)
def is_safe_webhook_url(url):
"""
Reject webhook targets that could be used for SSRF: only http/https,
and the resolved host must not be loopback / private / link-local /
reserved. Defends internal services and cloud metadata endpoints
(e.g. 169.254.169.254) even when the stored URL came from a backup.
"""
try:
parts = urlsplit(url)
except Exception:
return False
if parts.scheme not in ("http", "https") or not parts.hostname:
return False
try:
infos = socket.getaddrinfo(parts.hostname, None)
except socket.gaierror:
return False
for info in infos:
ip = ipaddress.ip_address(info[4][0])
if (ip.is_private or ip.is_loopback or ip.is_link_local
or ip.is_reserved or ip.is_multicast or ip.is_unspecified):
return False
return True
def parse_additional_headers(raw):
"""Best-effort parse of the stored additional_header JSON object."""
if not raw:
return {}
try:
parsed = json.loads(raw)
except (ValueError, TypeError):
logger.warning("webhook additional_header is not valid JSON; ignoring")
return {}
if not isinstance(parsed, dict):
return {}
return {str(k): str(v) for k, v in parsed.items()}
event_type_map = {
@@ -36,15 +81,21 @@ class WebhookEventHandler(abc.ABC):
setting = self.get_setting(session, path_id)
if setting is None:
return
webhook_url = setting["webhook_url"]
if not is_safe_webhook_url(webhook_url):
logger.warning("blocked webhook to unsafe URL: %s", webhook_url)
return
headers = {'Content-Type': 'application/json', 'x-alchegos-event': event_type_map[self.event_type]}
if setting.get("additional_header", None) is not None:
headers.update(json.loads(setting["additional_header"]))
headers.update(parse_additional_headers(setting.get("additional_header")))
body = json.dumps(payload, default=str)
try:
response = requests.post(setting["webhook_url"], data=body, headers=headers, timeout=5)
response = requests.post(
webhook_url, data=body, headers=headers,
timeout=5, allow_redirects=False,
)
response.raise_for_status()
except Exception as e:
print(e)
logger.warning("webhook delivery failed: %s", e)
def get_setting(self, session: Session, path_id):