Files
HangmanLab.Backend/events/WebhookEventHandlers/__init__.py
hzhang 58f23ddcb8 Security hardening: fix RCE, auth and SSRF issues
Critical:
- backup: prevent Zip Slip path traversal and zip bombs in restore/convert
  via safe_extract(); serialize get_backup() with backup_lock and always
  restore CWD so concurrent requests can't corrupt the os.chdir state
- app: only enable the Werkzeug debugger/reloader when ENVIRONMENT=dev;
  always init rate limits (also under WSGI), not just under __main__
- apikey: fix create_key never committing (session.commit -> commit()),
  validate roles against an allowlist, and fix revoke_key/update_last_used
  operating on detached instances so revocation actually persists
- env_provider: redact DB_PASSWORD and SESSION_SECRET_KEY in summerize()

High:
- markdown: filter private/protected docs for non-admins in the listing,
  get_home, get_index and search endpoints (was an anonymous data leak);
  escape LIKE metacharacters and cap search results
- webhooks: validate target URL to block SSRF (loopback/private/link-local/
  metadata IPs), disable redirects, safely parse additional_header
- auth: validate JWT issuer and require exp/iat; add timeout to JWKS fetch;
  harden Authorization header parsing against malformed values
- log: require admin for GET /api/log and auth for POST; bound entry size

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-16 16:12:43 +01:00

142 lines
4.8 KiB
Python

from sqlalchemy.orm import Session
from db.models.Path import Path
from db.models.PathSetting import PathSetting
from db.models.Webhook import Webhook
from db.models.WebhookSetting import WebhookSetting
from events import MARKDOWN_CREATED_EVENT, MARKDOWN_UPDATED_EVENT, MARKDOWN_DELETED_EVENT, PATH_CREATED_EVENT, \
PATH_UPDATED_EVENT, PATH_DELETED_EVENT
import abc
import importlib
import ipaddress
import json
import pkgutil
import socket
import requests
import db
from urllib.parse import urlsplit
import logging
logger = logging.getLogger(__name__)
def is_safe_webhook_url(url):
"""
Reject webhook targets that could be used for SSRF: only http/https,
and the resolved host must not be loopback / private / link-local /
reserved. Defends internal services and cloud metadata endpoints
(e.g. 169.254.169.254) even when the stored URL came from a backup.
"""
try:
parts = urlsplit(url)
except Exception:
return False
if parts.scheme not in ("http", "https") or not parts.hostname:
return False
try:
infos = socket.getaddrinfo(parts.hostname, None)
except socket.gaierror:
return False
for info in infos:
ip = ipaddress.ip_address(info[4][0])
if (ip.is_private or ip.is_loopback or ip.is_link_local
or ip.is_reserved or ip.is_multicast or ip.is_unspecified):
return False
return True
def parse_additional_headers(raw):
"""Best-effort parse of the stored additional_header JSON object."""
if not raw:
return {}
try:
parsed = json.loads(raw)
except (ValueError, TypeError):
logger.warning("webhook additional_header is not valid JSON; ignoring")
return {}
if not isinstance(parsed, dict):
return {}
return {str(k): str(v) for k, v in parsed.items()}
event_type_map = {
MARKDOWN_CREATED_EVENT: 'markdown_created_event',
MARKDOWN_UPDATED_EVENT: 'markdown_updated_event',
MARKDOWN_DELETED_EVENT: 'markdown_deleted_event',
PATH_CREATED_EVENT: 'path_created_event',
PATH_UPDATED_EVENT: 'path_updated_event',
PATH_DELETED_EVENT: 'path_deleted_event',
}
class WebhookEventHandler(abc.ABC):
def __init__(self, event_type=0):
self.event_type = event_type
@abc.abstractmethod
def get_path_id(self, payload):
pass
def __call__(self, *args, **kwargs):
payload = kwargs['payload']
path_id = self.get_path_id(payload)
with db.get_db() as session:
setting = self.get_setting(session, path_id)
if setting is None:
return
webhook_url = setting["webhook_url"]
if not is_safe_webhook_url(webhook_url):
logger.warning("blocked webhook to unsafe URL: %s", webhook_url)
return
headers = {'Content-Type': 'application/json', 'x-alchegos-event': event_type_map[self.event_type]}
headers.update(parse_additional_headers(setting.get("additional_header")))
body = json.dumps(payload, default=str)
try:
response = requests.post(
webhook_url, data=body, headers=headers,
timeout=5, allow_redirects=False,
)
response.raise_for_status()
except Exception as e:
logger.warning("webhook delivery failed: %s", e)
def get_setting(self, session: Session, path_id):
path = session.query(Path).filter(Path.id == path_id).first()
if path is None:
return None
p = path.to_dict()
path_setting = session.query(PathSetting).get(path.setting_id)
if path_setting is None:
return None
webhook_setting = session.query(WebhookSetting).get(path_setting.webhook_setting_id)
if webhook_setting is None and p["parent_id"] != 1:
return self.get_setting(session, p["parent_id"])
# Check if webhook_setting is still None (e.g., when parent_id == 1 or no parent found)
if webhook_setting is None:
return None
setting = webhook_setting.to_dict()
if not setting["enabled"] or setting["on_events"] & self.event_type == 0:
return None
webhook = session.query(Webhook).get(webhook_setting.webhook_id)
if webhook is None:
return None
setting["webhook_url"] = webhook.to_dict()["hook_url"]
return setting
_auto_instantiate_classes = set()
def auto_instantiate(cls):
_auto_instantiate_classes.add(cls)
return cls
def register_all_webhook_event_handlers():
package = __name__
package_path = __path__
for finder, name, ispkg in pkgutil.walk_packages(package_path, package+"."):
importlib.import_module(name)
for cls in _auto_instantiate_classes:
cls()