From 58f23ddcb814866a73b28e12d1684680044ddf2c Mon Sep 17 00:00:00 2001 From: hzhang Date: Sat, 16 May 2026 16:12:43 +0100 Subject: [PATCH] Security hardening: fix RCE, auth and SSRF issues Critical: - backup: prevent Zip Slip path traversal and zip bombs in restore/convert via safe_extract(); serialize get_backup() with backup_lock and always restore CWD so concurrent requests can't corrupt the os.chdir state - app: only enable the Werkzeug debugger/reloader when ENVIRONMENT=dev; always init rate limits (also under WSGI), not just under __main__ - apikey: fix create_key never committing (session.commit -> commit()), validate roles against an allowlist, and fix revoke_key/update_last_used operating on detached instances so revocation actually persists - env_provider: redact DB_PASSWORD and SESSION_SECRET_KEY in summerize() High: - markdown: filter private/protected docs for non-admins in the listing, get_home, get_index and search endpoints (was an anonymous data leak); escape LIKE metacharacters and cap search results - webhooks: validate target URL to block SSRF (loopback/private/link-local/ metadata IPs), disable redirects, safely parse additional_header - auth: validate JWT issuer and require exp/iat; add timeout to JWKS fetch; harden Authorization header parsing against malformed values - log: require admin for GET /api/log and auth for POST; bound entry size Co-Authored-By: Claude Opus 4.7 (1M context) --- api/__init__.py | 24 +++++++-- api/apikey/__init__.py | 30 +++++++---- api/backup.py | 48 +++++++++++++++++- api/log.py | 15 ++++-- api/markdown.py | 66 ++++++++++++++++++++++--- app.py | 8 ++- env_provider.py | 10 +++- events/WebhookEventHandlers/__init__.py | 59 ++++++++++++++++++++-- 8 files changed, 225 insertions(+), 35 deletions(-) diff --git a/api/__init__.py b/api/__init__.py index d42af5d..ac52bd1 100644 --- a/api/__init__.py +++ b/api/__init__.py @@ -37,7 +37,8 @@ def x5c_to_public_key(x5c): def get_jwks(): url = f"{env_provider.KC_HOST}/realms/{env_provider.KC_REALM}/protocol/openid-connect/certs" - response = requests.get(url) + response = requests.get(url, timeout=5) + response.raise_for_status() jwks = response.json() return jwks @@ -72,7 +73,9 @@ def verify_token(token): token, public_key, algorithms=["RS256"], - audience=env_provider.KC_CLIENT_ID + audience=env_provider.KC_CLIENT_ID, + issuer=f"{env_provider.KC_HOST}/realms/{env_provider.KC_REALM}", + options={"require": ["exp", "iat"]}, ) return decoded except ExpiredSignatureError as e: @@ -86,7 +89,10 @@ def is_user_admin(): is_admin = False auth_header = request.headers.get('Authorization') if auth_header and auth_header.startswith('Bearer'): - token = auth_header.split(" ")[1] + parts = auth_header.split(" ", 1) + if len(parts) != 2 or not parts[1].strip(): + return is_admin + token = parts[1].strip() decoded = verify_token(token) if decoded: user_roles = decoded.get("resource_access", {}).get(env_provider.KC_CLIENT_ID, {}).get("roles", []) @@ -124,7 +130,10 @@ def require_auth(roles=[]): if not auth_header or not auth_header.startswith('Bearer'): return jsonify({"error": "Unauthorized"}), 401 - token = auth_header.split(" ")[1] + parts = auth_header.split(" ", 1) + if len(parts) != 2 or not parts[1].strip(): + return jsonify({"error": "Unauthorized"}), 401 + token = parts[1].strip() decoded = verify_token(token) if not decoded: @@ -207,6 +216,11 @@ def get_api_key(key): return session.query(APIKey).filter_by(key=key, is_active=True).first() def update_last_used(api_key): + # api_key comes from get_api_key()'s (now closed) session, so it is + # detached. Update by key in a fresh session instead of mutating the + # detached instance, which would never be persisted. with get_db() as session: - api_key.last_used_at = datetime.now(UTC) + session.query(APIKey).filter_by(key=api_key.key).update( + {APIKey.last_used_at: datetime.now(UTC)} + ) session.commit() \ No newline at end of file diff --git a/api/apikey/__init__.py b/api/apikey/__init__.py index 6f7bd2c..359be27 100644 --- a/api/apikey/__init__.py +++ b/api/apikey/__init__.py @@ -1,36 +1,46 @@ from flask import Blueprint, request, jsonify -from api import get_api_key, generate_api_key +from api import generate_api_key from db import get_db from api import require_auth from db.models.APIKey import APIKey api_key_bp = Blueprint('apikey', __name__, url_prefix='/api/apikey') +# An API key must never be able to request a role broader than what the +# product defines, regardless of what the request body asks for. +ALLOWED_API_KEY_ROLES = {'admin', 'creator', 'user'} + @api_key_bp.route('/', methods=['POST']) @require_auth(roles=['admin']) def create_key(): - data = request.get_json() - + data = request.get_json(silent=True) + if not data or 'name' not in data: return jsonify({"error": "Name is required"}), 400 + roles = data.get('roles', []) + if not isinstance(roles, list) or any(r not in ALLOWED_API_KEY_ROLES for r in roles): + return jsonify({"error": f"roles must be a subset of {sorted(ALLOWED_API_KEY_ROLES)}"}), 400 + try: with get_db() as session: - apikey = APIKey(key=generate_api_key(),name=data['name'], roles=roles) - session.add(apikey); - session.commit - return jsonify(apikey.to_dict()), 201 + apikey = APIKey(key=generate_api_key(), name=data['name'], roles=roles) + session.add(apikey) + session.commit() + result = apikey.to_dict() + return jsonify(result), 201 except Exception as e: return jsonify({"error": str(e)}), 500 @api_key_bp.route('/', methods=['DELETE']) @require_auth(roles=['admin']) def revoke_key(key): - - api_key = get_api_key(key) + # Query and mutate within the same session, otherwise the update is + # performed on a detached instance and silently never persists. with get_db() as session: + api_key = session.query(APIKey).filter_by(key=key, is_active=True).first() if not api_key: return jsonify({"error": "API key not found"}), 404 api_key.is_active = False session.commit() - return jsonify({"message": "API key revoked successfully"}), 200 + return jsonify({"message": "API key revoked successfully"}), 200 diff --git a/api/backup.py b/api/backup.py index c673f26..ccaac4a 100644 --- a/api/backup.py +++ b/api/backup.py @@ -215,6 +215,40 @@ logger = logging.getLogger(__name__) backup_bp = Blueprint('backup', __name__, url_prefix='/api/backup') +# Upper bounds to defend against zip bombs in uploaded backups. +_MAX_ARCHIVE_MEMBERS = 50000 +_MAX_UNCOMPRESSED_BYTES = 1 * 1024 * 1024 * 1024 # 1 GiB + + +def safe_extract(zip_ref, dest_dir): + """ + Safely extract a zip archive into dest_dir. + + Prevents "Zip Slip" path traversal (entries like ``../../etc/x`` or + absolute paths) and rejects zip bombs by capping the member count and + total uncompressed size. Backups are admin-uploaded but may originate + from an untrusted source (e.g. the /convert endpoint ingests foreign + backups), so the contents must be treated as hostile. + """ + dest_root = os.path.realpath(dest_dir) + members = zip_ref.infolist() + + if len(members) > _MAX_ARCHIVE_MEMBERS: + raise ValueError("backup archive has too many entries") + + total = 0 + for member in members: + total += member.file_size + if total > _MAX_UNCOMPRESSED_BYTES: + raise ValueError("backup archive uncompressed size exceeds limit") + + target = os.path.realpath(os.path.join(dest_root, member.filename)) + if target != dest_root and not target.startswith(dest_root + os.sep): + raise ValueError(f"unsafe path in backup archive: {member.filename}") + + zip_ref.extractall(dest_root) + + def check_and_convert_backup_version(backup_dir): """ Check the backup version and convert it if necessary. @@ -280,7 +314,7 @@ def convert_backup_endpoint(): uploaded_file.save(zip_path) with zipfile.ZipFile(zip_path, 'r') as zip_ref: - zip_ref.extractall(backup_dir) + safe_extract(zip_ref, backup_dir) success, error_response = check_and_convert_backup_version(backup_dir) if not success: @@ -331,7 +365,12 @@ def get_backup(): Response Codes: - 200: Backup created successfully - 500: Failed to create backup + - 429: Another backup operation is in progress """ + if not backup_lock.acquire(blocking=False): + return jsonify({"error": "Another backup operation is in progress. Please try again later."}), 429 + + original_cwd = os.getcwd() try: if os.path.exists('Root'): shutil.rmtree('Root') @@ -370,6 +409,11 @@ def get_backup(): except Exception as e: logger.error(f"Failed to get backup: {e}") return jsonify({"error": "failed to get backup"}), 500 + finally: + # os.chdir is process-global; always restore CWD so a failure + # mid-traverse can't corrupt later requests. + os.chdir(original_cwd) + backup_lock.release() def create_and_cd(path_name): @@ -552,7 +596,7 @@ def load_backup(): uploaded_file.save(zip_path) with zipfile.ZipFile(zip_path, 'r') as zip_ref: - zip_ref.extractall(temp_dir) + safe_extract(zip_ref, temp_dir) root_dir = os.path.join(temp_dir, "Root") if not os.path.exists(root_dir): diff --git a/api/log.py b/api/log.py index 166078f..7f825c4 100644 --- a/api/log.py +++ b/api/log.py @@ -1,12 +1,18 @@ #api/log.py from flask import Blueprint, jsonify, request +from api import require_auth from db import get_db from db.models.Log import Log from db.utils import insert_log logs_bp = Blueprint('log', __name__, url_prefix='/api/log') +# Bound per-entry size so an authenticated-but-low-trust caller can't bloat +# the log table with multi-megabyte payloads. +_MAX_LOG_MESSAGE_LEN = 16 * 1024 + @logs_bp.route('/', methods=['GET']) +@require_auth(roles=['admin']) def get_logs(): level = request.args.get('level') application = request.args.get('application') @@ -29,14 +35,17 @@ def get_logs(): }) @logs_bp.route('/', methods=['POST']) +@require_auth() def create_log(): - data = request.json + data = request.get_json(silent=True) + if not data: + return jsonify({"error": "invalid or missing JSON body"}), 400 required_fields = ['level', 'message'] for field in required_fields: if field not in data: return jsonify({"error": f"missing {field} in request"}), 400 - level = data.get('level') - message = data.get('message') + level = str(data.get('level'))[:64] + message = str(data.get('message'))[:_MAX_LOG_MESSAGE_LEN] application = "frontend" extra = data.get('extra', None) log_entry = Log(level=level, message=message, application=application, extra=extra) diff --git a/api/markdown.py b/api/markdown.py index 9c09184..01c0ea1 100644 --- a/api/markdown.py +++ b/api/markdown.py @@ -15,6 +15,40 @@ logger = logging.getLogger(__name__) markdown_bp = Blueprint('markdown', __name__, url_prefix='/api/markdown') + +def _permission_of(session, markdown): + """Return the effective permission string ('private'/'protected'/None).""" + if markdown.setting_id is None: + return None + setting = session.query(MarkdownSetting).get(markdown.setting_id) + if not setting or not setting.permission_setting_id: + return None + ps = session.query(MarkdownPermissionSetting).get(setting.permission_setting_id) + return ps.permission if ps else None + + +def _filter_visible(session, markdowns, is_admin): + """ + Apply markdown permission settings to a list for non-admin callers: + private documents are dropped entirely, protected documents have their + content masked. Admins see everything. Mirrors get_markdown()'s checks + so listing/search endpoints can no longer leak restricted content. + """ + if is_admin: + return [md.to_dict() for md in markdowns] + + visible = [] + for md in markdowns: + permission = _permission_of(session, md) + if permission == 'private': + continue + data = md.to_dict() + if permission == 'protected': + data['content'] = None + data['protected'] = True + visible.append(data) + return visible + @markdown_bp.route('/', methods=['GET']) @limiter.limit(api.get_rate_limit) @etag_response @@ -30,9 +64,10 @@ def get_markdowns(): Response Codes: - 200: Success """ + is_admin = is_user_admin() with get_db() as session: mds = session.query(Markdown).all() - return jsonify([md.to_dict() for md in mds]), 200 + return jsonify(_filter_visible(session, mds, is_admin)), 200 @markdown_bp.route('/get_home', methods=['GET']) @limiter.limit(api.get_rate_limit) @@ -51,11 +86,15 @@ def get_home(): - 200: Success - 204: No content (home markdown not found) """ + is_admin = is_user_admin() with get_db() as session: markdown = session.query(Markdown).filter(Markdown.path_id == 1, Markdown.title == "index").first() if markdown is None: return jsonify({}), 204 - return jsonify(markdown.to_dict()), 200 + visible = _filter_visible(session, [markdown], is_admin) + if not visible: + return jsonify({}), 204 + return jsonify(visible[0]), 200 @markdown_bp.route('/by_path/', methods=['GET']) @limiter.limit(api.get_rate_limit) @@ -75,9 +114,10 @@ def get_markdowns_by_path(path_id): Response Codes: - 200: Success """ + is_admin = is_user_admin() with get_db() as session: markdowns = session.query(Markdown).filter(Markdown.path_id == path_id).all() - return jsonify([md.to_dict() for md in markdowns]), 200 + return jsonify(_filter_visible(session, markdowns, is_admin)), 200 @markdown_bp.route('/get_index/', methods=['GET']) @limiter.limit(api.get_rate_limit) @@ -99,11 +139,15 @@ def get_index(path_id): - 200: Success - 204: No content (index markdown not found) """ + is_admin = is_user_admin() with get_db() as session: markdown = session.query(Markdown).filter(Markdown.path_id == path_id).filter(Markdown.title == "index").first() if markdown is None: return jsonify({}), 204 - return jsonify(markdown.to_dict()), 200 + visible = _filter_visible(session, [markdown], is_admin) + if not visible: + return jsonify({}), 204 + return jsonify(visible[0]), 200 @@ -429,11 +473,19 @@ def search_markdowns(keyword): Response Codes: - 200: Success """ + is_admin = is_user_admin() + # Escape LIKE metacharacters so user input can't inject wildcards + # (full-table-scan DoS) and so search actually matches substrings. + escaped = keyword.replace('\\', '\\\\').replace('%', '\\%').replace('_', '\\_') + pattern = f"%{escaped}%" with get_db() as session: res = session.query(Markdown).filter( - or_(Markdown.title.like(keyword), Markdown.content.like(keyword)) - ).all() - return jsonify([md.to_dict() for md in res]), 200 + or_( + Markdown.title.like(pattern, escape='\\'), + Markdown.content.like(pattern, escape='\\'), + ) + ).limit(200).all() + return jsonify(_filter_visible(session, res, is_admin)), 200 @markdown_bp.route('/links', methods=['GET']) @limiter.limit(api.get_rate_limit) diff --git a/app.py b/app.py index bbcdc2c..823594e 100644 --- a/app.py +++ b/app.py @@ -53,8 +53,12 @@ def log_request(): logger.info(f"Request received: {request.method} {request.path} from {request.remote_addr}") +api.init_rate_limits(app) + if __name__ == '__main__': - api.init_rate_limits(app) print("env") pprint(env_provider.summerize()) - app.run(host='0.0.0.0', port=5000, debug=True, use_reloader=True) + # The Werkzeug debugger allows remote code execution. Only enable it in + # an explicit dev environment, never in the published production image. + is_dev = env_provider.ENVIRONMENT == "dev" + app.run(host='0.0.0.0', port=5000, debug=is_dev, use_reloader=is_dev) diff --git a/env_provider.py b/env_provider.py index 8881f48..805e119 100644 --- a/env_provider.py +++ b/env_provider.py @@ -23,6 +23,12 @@ KC_CLIENT_ID = os.getenv("KC_CLIENT_ID") FRONTEND_HOST = os.getenv("FRONTEND_HOST") BACKEND_HOST = os.getenv("BACKEND_HOST") +def _redact(value): + if not value: + return "" + return f"" + + def summerize(): return { "ENVIRONMENT": ENVIRONMENT, @@ -30,9 +36,9 @@ def summerize(): 'DB_PORT': DB_PORT, 'DB_NAME': DB_NAME, 'DB_USER': DB_USER, - 'DB_PASSWORD': DB_PASSWORD, + 'DB_PASSWORD': _redact(DB_PASSWORD), 'DB_SCHEMA_UPDATED': DB_SCHEMA_UPDATED, - 'SESSION_SECRET_KEY': SESSION_SECRET_KEY, + 'SESSION_SECRET_KEY': _redact(SESSION_SECRET_KEY), 'KC_HOST': KC_HOST, 'KC_REALM': KC_REALM, 'KC_CLIENT_ID': KC_CLIENT_ID, diff --git a/events/WebhookEventHandlers/__init__.py b/events/WebhookEventHandlers/__init__.py index feac0be..f652f81 100644 --- a/events/WebhookEventHandlers/__init__.py +++ b/events/WebhookEventHandlers/__init__.py @@ -7,10 +7,55 @@ from events import MARKDOWN_CREATED_EVENT, MARKDOWN_UPDATED_EVENT, MARKDOWN_DELE PATH_UPDATED_EVENT, PATH_DELETED_EVENT import abc import importlib +import ipaddress import json import pkgutil +import socket import requests import db +from urllib.parse import urlsplit + +import logging +logger = logging.getLogger(__name__) + + +def is_safe_webhook_url(url): + """ + Reject webhook targets that could be used for SSRF: only http/https, + and the resolved host must not be loopback / private / link-local / + reserved. Defends internal services and cloud metadata endpoints + (e.g. 169.254.169.254) even when the stored URL came from a backup. + """ + try: + parts = urlsplit(url) + except Exception: + return False + if parts.scheme not in ("http", "https") or not parts.hostname: + return False + try: + infos = socket.getaddrinfo(parts.hostname, None) + except socket.gaierror: + return False + for info in infos: + ip = ipaddress.ip_address(info[4][0]) + if (ip.is_private or ip.is_loopback or ip.is_link_local + or ip.is_reserved or ip.is_multicast or ip.is_unspecified): + return False + return True + + +def parse_additional_headers(raw): + """Best-effort parse of the stored additional_header JSON object.""" + if not raw: + return {} + try: + parsed = json.loads(raw) + except (ValueError, TypeError): + logger.warning("webhook additional_header is not valid JSON; ignoring") + return {} + if not isinstance(parsed, dict): + return {} + return {str(k): str(v) for k, v in parsed.items()} event_type_map = { @@ -36,15 +81,21 @@ class WebhookEventHandler(abc.ABC): setting = self.get_setting(session, path_id) if setting is None: return + webhook_url = setting["webhook_url"] + if not is_safe_webhook_url(webhook_url): + logger.warning("blocked webhook to unsafe URL: %s", webhook_url) + return headers = {'Content-Type': 'application/json', 'x-alchegos-event': event_type_map[self.event_type]} - if setting.get("additional_header", None) is not None: - headers.update(json.loads(setting["additional_header"])) + headers.update(parse_additional_headers(setting.get("additional_header"))) body = json.dumps(payload, default=str) try: - response = requests.post(setting["webhook_url"], data=body, headers=headers, timeout=5) + response = requests.post( + webhook_url, data=body, headers=headers, + timeout=5, allow_redirects=False, + ) response.raise_for_status() except Exception as e: - print(e) + logger.warning("webhook delivery failed: %s", e) def get_setting(self, session: Session, path_id):