Security hardening: fix RCE, auth and SSRF issues

Critical:
- backup: prevent Zip Slip path traversal and zip bombs in restore/convert
  via safe_extract(); serialize get_backup() with backup_lock and always
  restore CWD so concurrent requests can't corrupt the os.chdir state
- app: only enable the Werkzeug debugger/reloader when ENVIRONMENT=dev;
  always init rate limits (also under WSGI), not just under __main__
- apikey: fix create_key never committing (session.commit -> commit()),
  validate roles against an allowlist, and fix revoke_key/update_last_used
  operating on detached instances so revocation actually persists
- env_provider: redact DB_PASSWORD and SESSION_SECRET_KEY in summerize()

High:
- markdown: filter private/protected docs for non-admins in the listing,
  get_home, get_index and search endpoints (was an anonymous data leak);
  escape LIKE metacharacters and cap search results
- webhooks: validate target URL to block SSRF (loopback/private/link-local/
  metadata IPs), disable redirects, safely parse additional_header
- auth: validate JWT issuer and require exp/iat; add timeout to JWKS fetch;
  harden Authorization header parsing against malformed values
- log: require admin for GET /api/log and auth for POST; bound entry size

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
h z
2026-05-16 16:12:43 +01:00
parent 1f4ca52a10
commit 58f23ddcb8
8 changed files with 225 additions and 35 deletions

View File

@@ -37,7 +37,8 @@ def x5c_to_public_key(x5c):
def get_jwks(): def get_jwks():
url = f"{env_provider.KC_HOST}/realms/{env_provider.KC_REALM}/protocol/openid-connect/certs" url = f"{env_provider.KC_HOST}/realms/{env_provider.KC_REALM}/protocol/openid-connect/certs"
response = requests.get(url) response = requests.get(url, timeout=5)
response.raise_for_status()
jwks = response.json() jwks = response.json()
return jwks return jwks
@@ -72,7 +73,9 @@ def verify_token(token):
token, token,
public_key, public_key,
algorithms=["RS256"], algorithms=["RS256"],
audience=env_provider.KC_CLIENT_ID audience=env_provider.KC_CLIENT_ID,
issuer=f"{env_provider.KC_HOST}/realms/{env_provider.KC_REALM}",
options={"require": ["exp", "iat"]},
) )
return decoded return decoded
except ExpiredSignatureError as e: except ExpiredSignatureError as e:
@@ -86,7 +89,10 @@ def is_user_admin():
is_admin = False is_admin = False
auth_header = request.headers.get('Authorization') auth_header = request.headers.get('Authorization')
if auth_header and auth_header.startswith('Bearer'): if auth_header and auth_header.startswith('Bearer'):
token = auth_header.split(" ")[1] parts = auth_header.split(" ", 1)
if len(parts) != 2 or not parts[1].strip():
return is_admin
token = parts[1].strip()
decoded = verify_token(token) decoded = verify_token(token)
if decoded: if decoded:
user_roles = decoded.get("resource_access", {}).get(env_provider.KC_CLIENT_ID, {}).get("roles", []) user_roles = decoded.get("resource_access", {}).get(env_provider.KC_CLIENT_ID, {}).get("roles", [])
@@ -124,7 +130,10 @@ def require_auth(roles=[]):
if not auth_header or not auth_header.startswith('Bearer'): if not auth_header or not auth_header.startswith('Bearer'):
return jsonify({"error": "Unauthorized"}), 401 return jsonify({"error": "Unauthorized"}), 401
token = auth_header.split(" ")[1] parts = auth_header.split(" ", 1)
if len(parts) != 2 or not parts[1].strip():
return jsonify({"error": "Unauthorized"}), 401
token = parts[1].strip()
decoded = verify_token(token) decoded = verify_token(token)
if not decoded: if not decoded:
@@ -207,6 +216,11 @@ def get_api_key(key):
return session.query(APIKey).filter_by(key=key, is_active=True).first() return session.query(APIKey).filter_by(key=key, is_active=True).first()
def update_last_used(api_key): def update_last_used(api_key):
# api_key comes from get_api_key()'s (now closed) session, so it is
# detached. Update by key in a fresh session instead of mutating the
# detached instance, which would never be persisted.
with get_db() as session: with get_db() as session:
api_key.last_used_at = datetime.now(UTC) session.query(APIKey).filter_by(key=api_key.key).update(
{APIKey.last_used_at: datetime.now(UTC)}
)
session.commit() session.commit()

View File

@@ -1,34 +1,44 @@
from flask import Blueprint, request, jsonify from flask import Blueprint, request, jsonify
from api import get_api_key, generate_api_key from api import generate_api_key
from db import get_db from db import get_db
from api import require_auth from api import require_auth
from db.models.APIKey import APIKey from db.models.APIKey import APIKey
api_key_bp = Blueprint('apikey', __name__, url_prefix='/api/apikey') api_key_bp = Blueprint('apikey', __name__, url_prefix='/api/apikey')
# An API key must never be able to request a role broader than what the
# product defines, regardless of what the request body asks for.
ALLOWED_API_KEY_ROLES = {'admin', 'creator', 'user'}
@api_key_bp.route('/', methods=['POST']) @api_key_bp.route('/', methods=['POST'])
@require_auth(roles=['admin']) @require_auth(roles=['admin'])
def create_key(): def create_key():
data = request.get_json() data = request.get_json(silent=True)
if not data or 'name' not in data: if not data or 'name' not in data:
return jsonify({"error": "Name is required"}), 400 return jsonify({"error": "Name is required"}), 400
roles = data.get('roles', []) roles = data.get('roles', [])
if not isinstance(roles, list) or any(r not in ALLOWED_API_KEY_ROLES for r in roles):
return jsonify({"error": f"roles must be a subset of {sorted(ALLOWED_API_KEY_ROLES)}"}), 400
try: try:
with get_db() as session: with get_db() as session:
apikey = APIKey(key=generate_api_key(),name=data['name'], roles=roles) apikey = APIKey(key=generate_api_key(), name=data['name'], roles=roles)
session.add(apikey); session.add(apikey)
session.commit session.commit()
return jsonify(apikey.to_dict()), 201 result = apikey.to_dict()
return jsonify(result), 201
except Exception as e: except Exception as e:
return jsonify({"error": str(e)}), 500 return jsonify({"error": str(e)}), 500
@api_key_bp.route('/<key>', methods=['DELETE']) @api_key_bp.route('/<key>', methods=['DELETE'])
@require_auth(roles=['admin']) @require_auth(roles=['admin'])
def revoke_key(key): def revoke_key(key):
# Query and mutate within the same session, otherwise the update is
api_key = get_api_key(key) # performed on a detached instance and silently never persists.
with get_db() as session: with get_db() as session:
api_key = session.query(APIKey).filter_by(key=key, is_active=True).first()
if not api_key: if not api_key:
return jsonify({"error": "API key not found"}), 404 return jsonify({"error": "API key not found"}), 404
api_key.is_active = False api_key.is_active = False

View File

@@ -215,6 +215,40 @@ logger = logging.getLogger(__name__)
backup_bp = Blueprint('backup', __name__, url_prefix='/api/backup') backup_bp = Blueprint('backup', __name__, url_prefix='/api/backup')
# Upper bounds to defend against zip bombs in uploaded backups.
_MAX_ARCHIVE_MEMBERS = 50000
_MAX_UNCOMPRESSED_BYTES = 1 * 1024 * 1024 * 1024 # 1 GiB
def safe_extract(zip_ref, dest_dir):
"""
Safely extract a zip archive into dest_dir.
Prevents "Zip Slip" path traversal (entries like ``../../etc/x`` or
absolute paths) and rejects zip bombs by capping the member count and
total uncompressed size. Backups are admin-uploaded but may originate
from an untrusted source (e.g. the /convert endpoint ingests foreign
backups), so the contents must be treated as hostile.
"""
dest_root = os.path.realpath(dest_dir)
members = zip_ref.infolist()
if len(members) > _MAX_ARCHIVE_MEMBERS:
raise ValueError("backup archive has too many entries")
total = 0
for member in members:
total += member.file_size
if total > _MAX_UNCOMPRESSED_BYTES:
raise ValueError("backup archive uncompressed size exceeds limit")
target = os.path.realpath(os.path.join(dest_root, member.filename))
if target != dest_root and not target.startswith(dest_root + os.sep):
raise ValueError(f"unsafe path in backup archive: {member.filename}")
zip_ref.extractall(dest_root)
def check_and_convert_backup_version(backup_dir): def check_and_convert_backup_version(backup_dir):
""" """
Check the backup version and convert it if necessary. Check the backup version and convert it if necessary.
@@ -280,7 +314,7 @@ def convert_backup_endpoint():
uploaded_file.save(zip_path) uploaded_file.save(zip_path)
with zipfile.ZipFile(zip_path, 'r') as zip_ref: with zipfile.ZipFile(zip_path, 'r') as zip_ref:
zip_ref.extractall(backup_dir) safe_extract(zip_ref, backup_dir)
success, error_response = check_and_convert_backup_version(backup_dir) success, error_response = check_and_convert_backup_version(backup_dir)
if not success: if not success:
@@ -331,7 +365,12 @@ def get_backup():
Response Codes: Response Codes:
- 200: Backup created successfully - 200: Backup created successfully
- 500: Failed to create backup - 500: Failed to create backup
- 429: Another backup operation is in progress
""" """
if not backup_lock.acquire(blocking=False):
return jsonify({"error": "Another backup operation is in progress. Please try again later."}), 429
original_cwd = os.getcwd()
try: try:
if os.path.exists('Root'): if os.path.exists('Root'):
shutil.rmtree('Root') shutil.rmtree('Root')
@@ -370,6 +409,11 @@ def get_backup():
except Exception as e: except Exception as e:
logger.error(f"Failed to get backup: {e}") logger.error(f"Failed to get backup: {e}")
return jsonify({"error": "failed to get backup"}), 500 return jsonify({"error": "failed to get backup"}), 500
finally:
# os.chdir is process-global; always restore CWD so a failure
# mid-traverse can't corrupt later requests.
os.chdir(original_cwd)
backup_lock.release()
def create_and_cd(path_name): def create_and_cd(path_name):
@@ -552,7 +596,7 @@ def load_backup():
uploaded_file.save(zip_path) uploaded_file.save(zip_path)
with zipfile.ZipFile(zip_path, 'r') as zip_ref: with zipfile.ZipFile(zip_path, 'r') as zip_ref:
zip_ref.extractall(temp_dir) safe_extract(zip_ref, temp_dir)
root_dir = os.path.join(temp_dir, "Root") root_dir = os.path.join(temp_dir, "Root")
if not os.path.exists(root_dir): if not os.path.exists(root_dir):

View File

@@ -1,12 +1,18 @@
#api/log.py #api/log.py
from flask import Blueprint, jsonify, request from flask import Blueprint, jsonify, request
from api import require_auth
from db import get_db from db import get_db
from db.models.Log import Log from db.models.Log import Log
from db.utils import insert_log from db.utils import insert_log
logs_bp = Blueprint('log', __name__, url_prefix='/api/log') logs_bp = Blueprint('log', __name__, url_prefix='/api/log')
# Bound per-entry size so an authenticated-but-low-trust caller can't bloat
# the log table with multi-megabyte payloads.
_MAX_LOG_MESSAGE_LEN = 16 * 1024
@logs_bp.route('/', methods=['GET']) @logs_bp.route('/', methods=['GET'])
@require_auth(roles=['admin'])
def get_logs(): def get_logs():
level = request.args.get('level') level = request.args.get('level')
application = request.args.get('application') application = request.args.get('application')
@@ -29,14 +35,17 @@ def get_logs():
}) })
@logs_bp.route('/', methods=['POST']) @logs_bp.route('/', methods=['POST'])
@require_auth()
def create_log(): def create_log():
data = request.json data = request.get_json(silent=True)
if not data:
return jsonify({"error": "invalid or missing JSON body"}), 400
required_fields = ['level', 'message'] required_fields = ['level', 'message']
for field in required_fields: for field in required_fields:
if field not in data: if field not in data:
return jsonify({"error": f"missing {field} in request"}), 400 return jsonify({"error": f"missing {field} in request"}), 400
level = data.get('level') level = str(data.get('level'))[:64]
message = data.get('message') message = str(data.get('message'))[:_MAX_LOG_MESSAGE_LEN]
application = "frontend" application = "frontend"
extra = data.get('extra', None) extra = data.get('extra', None)
log_entry = Log(level=level, message=message, application=application, extra=extra) log_entry = Log(level=level, message=message, application=application, extra=extra)

View File

@@ -15,6 +15,40 @@ logger = logging.getLogger(__name__)
markdown_bp = Blueprint('markdown', __name__, url_prefix='/api/markdown') markdown_bp = Blueprint('markdown', __name__, url_prefix='/api/markdown')
def _permission_of(session, markdown):
"""Return the effective permission string ('private'/'protected'/None)."""
if markdown.setting_id is None:
return None
setting = session.query(MarkdownSetting).get(markdown.setting_id)
if not setting or not setting.permission_setting_id:
return None
ps = session.query(MarkdownPermissionSetting).get(setting.permission_setting_id)
return ps.permission if ps else None
def _filter_visible(session, markdowns, is_admin):
"""
Apply markdown permission settings to a list for non-admin callers:
private documents are dropped entirely, protected documents have their
content masked. Admins see everything. Mirrors get_markdown()'s checks
so listing/search endpoints can no longer leak restricted content.
"""
if is_admin:
return [md.to_dict() for md in markdowns]
visible = []
for md in markdowns:
permission = _permission_of(session, md)
if permission == 'private':
continue
data = md.to_dict()
if permission == 'protected':
data['content'] = None
data['protected'] = True
visible.append(data)
return visible
@markdown_bp.route('/', methods=['GET']) @markdown_bp.route('/', methods=['GET'])
@limiter.limit(api.get_rate_limit) @limiter.limit(api.get_rate_limit)
@etag_response @etag_response
@@ -30,9 +64,10 @@ def get_markdowns():
Response Codes: Response Codes:
- 200: Success - 200: Success
""" """
is_admin = is_user_admin()
with get_db() as session: with get_db() as session:
mds = session.query(Markdown).all() mds = session.query(Markdown).all()
return jsonify([md.to_dict() for md in mds]), 200 return jsonify(_filter_visible(session, mds, is_admin)), 200
@markdown_bp.route('/get_home', methods=['GET']) @markdown_bp.route('/get_home', methods=['GET'])
@limiter.limit(api.get_rate_limit) @limiter.limit(api.get_rate_limit)
@@ -51,11 +86,15 @@ def get_home():
- 200: Success - 200: Success
- 204: No content (home markdown not found) - 204: No content (home markdown not found)
""" """
is_admin = is_user_admin()
with get_db() as session: with get_db() as session:
markdown = session.query(Markdown).filter(Markdown.path_id == 1, Markdown.title == "index").first() markdown = session.query(Markdown).filter(Markdown.path_id == 1, Markdown.title == "index").first()
if markdown is None: if markdown is None:
return jsonify({}), 204 return jsonify({}), 204
return jsonify(markdown.to_dict()), 200 visible = _filter_visible(session, [markdown], is_admin)
if not visible:
return jsonify({}), 204
return jsonify(visible[0]), 200
@markdown_bp.route('/by_path/<int:path_id>', methods=['GET']) @markdown_bp.route('/by_path/<int:path_id>', methods=['GET'])
@limiter.limit(api.get_rate_limit) @limiter.limit(api.get_rate_limit)
@@ -75,9 +114,10 @@ def get_markdowns_by_path(path_id):
Response Codes: Response Codes:
- 200: Success - 200: Success
""" """
is_admin = is_user_admin()
with get_db() as session: with get_db() as session:
markdowns = session.query(Markdown).filter(Markdown.path_id == path_id).all() markdowns = session.query(Markdown).filter(Markdown.path_id == path_id).all()
return jsonify([md.to_dict() for md in markdowns]), 200 return jsonify(_filter_visible(session, markdowns, is_admin)), 200
@markdown_bp.route('/get_index/<int:path_id>', methods=['GET']) @markdown_bp.route('/get_index/<int:path_id>', methods=['GET'])
@limiter.limit(api.get_rate_limit) @limiter.limit(api.get_rate_limit)
@@ -99,11 +139,15 @@ def get_index(path_id):
- 200: Success - 200: Success
- 204: No content (index markdown not found) - 204: No content (index markdown not found)
""" """
is_admin = is_user_admin()
with get_db() as session: with get_db() as session:
markdown = session.query(Markdown).filter(Markdown.path_id == path_id).filter(Markdown.title == "index").first() markdown = session.query(Markdown).filter(Markdown.path_id == path_id).filter(Markdown.title == "index").first()
if markdown is None: if markdown is None:
return jsonify({}), 204 return jsonify({}), 204
return jsonify(markdown.to_dict()), 200 visible = _filter_visible(session, [markdown], is_admin)
if not visible:
return jsonify({}), 204
return jsonify(visible[0]), 200
@@ -429,11 +473,19 @@ def search_markdowns(keyword):
Response Codes: Response Codes:
- 200: Success - 200: Success
""" """
is_admin = is_user_admin()
# Escape LIKE metacharacters so user input can't inject wildcards
# (full-table-scan DoS) and so search actually matches substrings.
escaped = keyword.replace('\\', '\\\\').replace('%', '\\%').replace('_', '\\_')
pattern = f"%{escaped}%"
with get_db() as session: with get_db() as session:
res = session.query(Markdown).filter( res = session.query(Markdown).filter(
or_(Markdown.title.like(keyword), Markdown.content.like(keyword)) or_(
).all() Markdown.title.like(pattern, escape='\\'),
return jsonify([md.to_dict() for md in res]), 200 Markdown.content.like(pattern, escape='\\'),
)
).limit(200).all()
return jsonify(_filter_visible(session, res, is_admin)), 200
@markdown_bp.route('/links', methods=['GET']) @markdown_bp.route('/links', methods=['GET'])
@limiter.limit(api.get_rate_limit) @limiter.limit(api.get_rate_limit)

8
app.py
View File

@@ -53,8 +53,12 @@ def log_request():
logger.info(f"Request received: {request.method} {request.path} from {request.remote_addr}") logger.info(f"Request received: {request.method} {request.path} from {request.remote_addr}")
api.init_rate_limits(app)
if __name__ == '__main__': if __name__ == '__main__':
api.init_rate_limits(app)
print("env") print("env")
pprint(env_provider.summerize()) pprint(env_provider.summerize())
app.run(host='0.0.0.0', port=5000, debug=True, use_reloader=True) # The Werkzeug debugger allows remote code execution. Only enable it in
# an explicit dev environment, never in the published production image.
is_dev = env_provider.ENVIRONMENT == "dev"
app.run(host='0.0.0.0', port=5000, debug=is_dev, use_reloader=is_dev)

View File

@@ -23,6 +23,12 @@ KC_CLIENT_ID = os.getenv("KC_CLIENT_ID")
FRONTEND_HOST = os.getenv("FRONTEND_HOST") FRONTEND_HOST = os.getenv("FRONTEND_HOST")
BACKEND_HOST = os.getenv("BACKEND_HOST") BACKEND_HOST = os.getenv("BACKEND_HOST")
def _redact(value):
if not value:
return "<unset>"
return f"<set:{len(str(value))} chars>"
def summerize(): def summerize():
return { return {
"ENVIRONMENT": ENVIRONMENT, "ENVIRONMENT": ENVIRONMENT,
@@ -30,9 +36,9 @@ def summerize():
'DB_PORT': DB_PORT, 'DB_PORT': DB_PORT,
'DB_NAME': DB_NAME, 'DB_NAME': DB_NAME,
'DB_USER': DB_USER, 'DB_USER': DB_USER,
'DB_PASSWORD': DB_PASSWORD, 'DB_PASSWORD': _redact(DB_PASSWORD),
'DB_SCHEMA_UPDATED': DB_SCHEMA_UPDATED, 'DB_SCHEMA_UPDATED': DB_SCHEMA_UPDATED,
'SESSION_SECRET_KEY': SESSION_SECRET_KEY, 'SESSION_SECRET_KEY': _redact(SESSION_SECRET_KEY),
'KC_HOST': KC_HOST, 'KC_HOST': KC_HOST,
'KC_REALM': KC_REALM, 'KC_REALM': KC_REALM,
'KC_CLIENT_ID': KC_CLIENT_ID, 'KC_CLIENT_ID': KC_CLIENT_ID,

View File

@@ -7,10 +7,55 @@ from events import MARKDOWN_CREATED_EVENT, MARKDOWN_UPDATED_EVENT, MARKDOWN_DELE
PATH_UPDATED_EVENT, PATH_DELETED_EVENT PATH_UPDATED_EVENT, PATH_DELETED_EVENT
import abc import abc
import importlib import importlib
import ipaddress
import json import json
import pkgutil import pkgutil
import socket
import requests import requests
import db import db
from urllib.parse import urlsplit
import logging
logger = logging.getLogger(__name__)
def is_safe_webhook_url(url):
"""
Reject webhook targets that could be used for SSRF: only http/https,
and the resolved host must not be loopback / private / link-local /
reserved. Defends internal services and cloud metadata endpoints
(e.g. 169.254.169.254) even when the stored URL came from a backup.
"""
try:
parts = urlsplit(url)
except Exception:
return False
if parts.scheme not in ("http", "https") or not parts.hostname:
return False
try:
infos = socket.getaddrinfo(parts.hostname, None)
except socket.gaierror:
return False
for info in infos:
ip = ipaddress.ip_address(info[4][0])
if (ip.is_private or ip.is_loopback or ip.is_link_local
or ip.is_reserved or ip.is_multicast or ip.is_unspecified):
return False
return True
def parse_additional_headers(raw):
"""Best-effort parse of the stored additional_header JSON object."""
if not raw:
return {}
try:
parsed = json.loads(raw)
except (ValueError, TypeError):
logger.warning("webhook additional_header is not valid JSON; ignoring")
return {}
if not isinstance(parsed, dict):
return {}
return {str(k): str(v) for k, v in parsed.items()}
event_type_map = { event_type_map = {
@@ -36,15 +81,21 @@ class WebhookEventHandler(abc.ABC):
setting = self.get_setting(session, path_id) setting = self.get_setting(session, path_id)
if setting is None: if setting is None:
return return
webhook_url = setting["webhook_url"]
if not is_safe_webhook_url(webhook_url):
logger.warning("blocked webhook to unsafe URL: %s", webhook_url)
return
headers = {'Content-Type': 'application/json', 'x-alchegos-event': event_type_map[self.event_type]} headers = {'Content-Type': 'application/json', 'x-alchegos-event': event_type_map[self.event_type]}
if setting.get("additional_header", None) is not None: headers.update(parse_additional_headers(setting.get("additional_header")))
headers.update(json.loads(setting["additional_header"]))
body = json.dumps(payload, default=str) body = json.dumps(payload, default=str)
try: try:
response = requests.post(setting["webhook_url"], data=body, headers=headers, timeout=5) response = requests.post(
webhook_url, data=body, headers=headers,
timeout=5, allow_redirects=False,
)
response.raise_for_status() response.raise_for_status()
except Exception as e: except Exception as e:
print(e) logger.warning("webhook delivery failed: %s", e)
def get_setting(self, session: Session, path_id): def get_setting(self, session: Session, path_id):