Security hardening: fix RCE, auth and SSRF issues

Critical:
- backup: prevent Zip Slip path traversal and zip bombs in restore/convert
  via safe_extract(); serialize get_backup() with backup_lock and always
  restore CWD so concurrent requests can't corrupt the os.chdir state
- app: only enable the Werkzeug debugger/reloader when ENVIRONMENT=dev;
  always init rate limits (also under WSGI), not just under __main__
- apikey: fix create_key never committing (session.commit -> commit()),
  validate roles against an allowlist, and fix revoke_key/update_last_used
  operating on detached instances so revocation actually persists
- env_provider: redact DB_PASSWORD and SESSION_SECRET_KEY in summerize()

High:
- markdown: filter private/protected docs for non-admins in the listing,
  get_home, get_index and search endpoints (was an anonymous data leak);
  escape LIKE metacharacters and cap search results
- webhooks: validate target URL to block SSRF (loopback/private/link-local/
  metadata IPs), disable redirects, safely parse additional_header
- auth: validate JWT issuer and require exp/iat; add timeout to JWKS fetch;
  harden Authorization header parsing against malformed values
- log: require admin for GET /api/log and auth for POST; bound entry size

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
h z
2026-05-16 16:12:43 +01:00
parent 1f4ca52a10
commit 58f23ddcb8
8 changed files with 225 additions and 35 deletions

View File

@@ -37,7 +37,8 @@ def x5c_to_public_key(x5c):
def get_jwks():
url = f"{env_provider.KC_HOST}/realms/{env_provider.KC_REALM}/protocol/openid-connect/certs"
response = requests.get(url)
response = requests.get(url, timeout=5)
response.raise_for_status()
jwks = response.json()
return jwks
@@ -72,7 +73,9 @@ def verify_token(token):
token,
public_key,
algorithms=["RS256"],
audience=env_provider.KC_CLIENT_ID
audience=env_provider.KC_CLIENT_ID,
issuer=f"{env_provider.KC_HOST}/realms/{env_provider.KC_REALM}",
options={"require": ["exp", "iat"]},
)
return decoded
except ExpiredSignatureError as e:
@@ -86,7 +89,10 @@ def is_user_admin():
is_admin = False
auth_header = request.headers.get('Authorization')
if auth_header and auth_header.startswith('Bearer'):
token = auth_header.split(" ")[1]
parts = auth_header.split(" ", 1)
if len(parts) != 2 or not parts[1].strip():
return is_admin
token = parts[1].strip()
decoded = verify_token(token)
if decoded:
user_roles = decoded.get("resource_access", {}).get(env_provider.KC_CLIENT_ID, {}).get("roles", [])
@@ -124,7 +130,10 @@ def require_auth(roles=[]):
if not auth_header or not auth_header.startswith('Bearer'):
return jsonify({"error": "Unauthorized"}), 401
token = auth_header.split(" ")[1]
parts = auth_header.split(" ", 1)
if len(parts) != 2 or not parts[1].strip():
return jsonify({"error": "Unauthorized"}), 401
token = parts[1].strip()
decoded = verify_token(token)
if not decoded:
@@ -207,6 +216,11 @@ def get_api_key(key):
return session.query(APIKey).filter_by(key=key, is_active=True).first()
def update_last_used(api_key):
# api_key comes from get_api_key()'s (now closed) session, so it is
# detached. Update by key in a fresh session instead of mutating the
# detached instance, which would never be persisted.
with get_db() as session:
api_key.last_used_at = datetime.now(UTC)
session.query(APIKey).filter_by(key=api_key.key).update(
{APIKey.last_used_at: datetime.now(UTC)}
)
session.commit()

View File

@@ -1,34 +1,44 @@
from flask import Blueprint, request, jsonify
from api import get_api_key, generate_api_key
from api import generate_api_key
from db import get_db
from api import require_auth
from db.models.APIKey import APIKey
api_key_bp = Blueprint('apikey', __name__, url_prefix='/api/apikey')
# An API key must never be able to request a role broader than what the
# product defines, regardless of what the request body asks for.
ALLOWED_API_KEY_ROLES = {'admin', 'creator', 'user'}
@api_key_bp.route('/', methods=['POST'])
@require_auth(roles=['admin'])
def create_key():
data = request.get_json()
data = request.get_json(silent=True)
if not data or 'name' not in data:
return jsonify({"error": "Name is required"}), 400
roles = data.get('roles', [])
if not isinstance(roles, list) or any(r not in ALLOWED_API_KEY_ROLES for r in roles):
return jsonify({"error": f"roles must be a subset of {sorted(ALLOWED_API_KEY_ROLES)}"}), 400
try:
with get_db() as session:
apikey = APIKey(key=generate_api_key(),name=data['name'], roles=roles)
session.add(apikey);
session.commit
return jsonify(apikey.to_dict()), 201
apikey = APIKey(key=generate_api_key(), name=data['name'], roles=roles)
session.add(apikey)
session.commit()
result = apikey.to_dict()
return jsonify(result), 201
except Exception as e:
return jsonify({"error": str(e)}), 500
@api_key_bp.route('/<key>', methods=['DELETE'])
@require_auth(roles=['admin'])
def revoke_key(key):
api_key = get_api_key(key)
# Query and mutate within the same session, otherwise the update is
# performed on a detached instance and silently never persists.
with get_db() as session:
api_key = session.query(APIKey).filter_by(key=key, is_active=True).first()
if not api_key:
return jsonify({"error": "API key not found"}), 404
api_key.is_active = False

View File

@@ -215,6 +215,40 @@ logger = logging.getLogger(__name__)
backup_bp = Blueprint('backup', __name__, url_prefix='/api/backup')
# Upper bounds to defend against zip bombs in uploaded backups.
_MAX_ARCHIVE_MEMBERS = 50000
_MAX_UNCOMPRESSED_BYTES = 1 * 1024 * 1024 * 1024 # 1 GiB
def safe_extract(zip_ref, dest_dir):
"""
Safely extract a zip archive into dest_dir.
Prevents "Zip Slip" path traversal (entries like ``../../etc/x`` or
absolute paths) and rejects zip bombs by capping the member count and
total uncompressed size. Backups are admin-uploaded but may originate
from an untrusted source (e.g. the /convert endpoint ingests foreign
backups), so the contents must be treated as hostile.
"""
dest_root = os.path.realpath(dest_dir)
members = zip_ref.infolist()
if len(members) > _MAX_ARCHIVE_MEMBERS:
raise ValueError("backup archive has too many entries")
total = 0
for member in members:
total += member.file_size
if total > _MAX_UNCOMPRESSED_BYTES:
raise ValueError("backup archive uncompressed size exceeds limit")
target = os.path.realpath(os.path.join(dest_root, member.filename))
if target != dest_root and not target.startswith(dest_root + os.sep):
raise ValueError(f"unsafe path in backup archive: {member.filename}")
zip_ref.extractall(dest_root)
def check_and_convert_backup_version(backup_dir):
"""
Check the backup version and convert it if necessary.
@@ -280,7 +314,7 @@ def convert_backup_endpoint():
uploaded_file.save(zip_path)
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
zip_ref.extractall(backup_dir)
safe_extract(zip_ref, backup_dir)
success, error_response = check_and_convert_backup_version(backup_dir)
if not success:
@@ -331,7 +365,12 @@ def get_backup():
Response Codes:
- 200: Backup created successfully
- 500: Failed to create backup
- 429: Another backup operation is in progress
"""
if not backup_lock.acquire(blocking=False):
return jsonify({"error": "Another backup operation is in progress. Please try again later."}), 429
original_cwd = os.getcwd()
try:
if os.path.exists('Root'):
shutil.rmtree('Root')
@@ -370,6 +409,11 @@ def get_backup():
except Exception as e:
logger.error(f"Failed to get backup: {e}")
return jsonify({"error": "failed to get backup"}), 500
finally:
# os.chdir is process-global; always restore CWD so a failure
# mid-traverse can't corrupt later requests.
os.chdir(original_cwd)
backup_lock.release()
def create_and_cd(path_name):
@@ -552,7 +596,7 @@ def load_backup():
uploaded_file.save(zip_path)
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
zip_ref.extractall(temp_dir)
safe_extract(zip_ref, temp_dir)
root_dir = os.path.join(temp_dir, "Root")
if not os.path.exists(root_dir):

View File

@@ -1,12 +1,18 @@
#api/log.py
from flask import Blueprint, jsonify, request
from api import require_auth
from db import get_db
from db.models.Log import Log
from db.utils import insert_log
logs_bp = Blueprint('log', __name__, url_prefix='/api/log')
# Bound per-entry size so an authenticated-but-low-trust caller can't bloat
# the log table with multi-megabyte payloads.
_MAX_LOG_MESSAGE_LEN = 16 * 1024
@logs_bp.route('/', methods=['GET'])
@require_auth(roles=['admin'])
def get_logs():
level = request.args.get('level')
application = request.args.get('application')
@@ -29,14 +35,17 @@ def get_logs():
})
@logs_bp.route('/', methods=['POST'])
@require_auth()
def create_log():
data = request.json
data = request.get_json(silent=True)
if not data:
return jsonify({"error": "invalid or missing JSON body"}), 400
required_fields = ['level', 'message']
for field in required_fields:
if field not in data:
return jsonify({"error": f"missing {field} in request"}), 400
level = data.get('level')
message = data.get('message')
level = str(data.get('level'))[:64]
message = str(data.get('message'))[:_MAX_LOG_MESSAGE_LEN]
application = "frontend"
extra = data.get('extra', None)
log_entry = Log(level=level, message=message, application=application, extra=extra)

View File

@@ -15,6 +15,40 @@ logger = logging.getLogger(__name__)
markdown_bp = Blueprint('markdown', __name__, url_prefix='/api/markdown')
def _permission_of(session, markdown):
"""Return the effective permission string ('private'/'protected'/None)."""
if markdown.setting_id is None:
return None
setting = session.query(MarkdownSetting).get(markdown.setting_id)
if not setting or not setting.permission_setting_id:
return None
ps = session.query(MarkdownPermissionSetting).get(setting.permission_setting_id)
return ps.permission if ps else None
def _filter_visible(session, markdowns, is_admin):
"""
Apply markdown permission settings to a list for non-admin callers:
private documents are dropped entirely, protected documents have their
content masked. Admins see everything. Mirrors get_markdown()'s checks
so listing/search endpoints can no longer leak restricted content.
"""
if is_admin:
return [md.to_dict() for md in markdowns]
visible = []
for md in markdowns:
permission = _permission_of(session, md)
if permission == 'private':
continue
data = md.to_dict()
if permission == 'protected':
data['content'] = None
data['protected'] = True
visible.append(data)
return visible
@markdown_bp.route('/', methods=['GET'])
@limiter.limit(api.get_rate_limit)
@etag_response
@@ -30,9 +64,10 @@ def get_markdowns():
Response Codes:
- 200: Success
"""
is_admin = is_user_admin()
with get_db() as session:
mds = session.query(Markdown).all()
return jsonify([md.to_dict() for md in mds]), 200
return jsonify(_filter_visible(session, mds, is_admin)), 200
@markdown_bp.route('/get_home', methods=['GET'])
@limiter.limit(api.get_rate_limit)
@@ -51,11 +86,15 @@ def get_home():
- 200: Success
- 204: No content (home markdown not found)
"""
is_admin = is_user_admin()
with get_db() as session:
markdown = session.query(Markdown).filter(Markdown.path_id == 1, Markdown.title == "index").first()
if markdown is None:
return jsonify({}), 204
return jsonify(markdown.to_dict()), 200
visible = _filter_visible(session, [markdown], is_admin)
if not visible:
return jsonify({}), 204
return jsonify(visible[0]), 200
@markdown_bp.route('/by_path/<int:path_id>', methods=['GET'])
@limiter.limit(api.get_rate_limit)
@@ -75,9 +114,10 @@ def get_markdowns_by_path(path_id):
Response Codes:
- 200: Success
"""
is_admin = is_user_admin()
with get_db() as session:
markdowns = session.query(Markdown).filter(Markdown.path_id == path_id).all()
return jsonify([md.to_dict() for md in markdowns]), 200
return jsonify(_filter_visible(session, markdowns, is_admin)), 200
@markdown_bp.route('/get_index/<int:path_id>', methods=['GET'])
@limiter.limit(api.get_rate_limit)
@@ -99,11 +139,15 @@ def get_index(path_id):
- 200: Success
- 204: No content (index markdown not found)
"""
is_admin = is_user_admin()
with get_db() as session:
markdown = session.query(Markdown).filter(Markdown.path_id == path_id).filter(Markdown.title == "index").first()
if markdown is None:
return jsonify({}), 204
return jsonify(markdown.to_dict()), 200
visible = _filter_visible(session, [markdown], is_admin)
if not visible:
return jsonify({}), 204
return jsonify(visible[0]), 200
@@ -429,11 +473,19 @@ def search_markdowns(keyword):
Response Codes:
- 200: Success
"""
is_admin = is_user_admin()
# Escape LIKE metacharacters so user input can't inject wildcards
# (full-table-scan DoS) and so search actually matches substrings.
escaped = keyword.replace('\\', '\\\\').replace('%', '\\%').replace('_', '\\_')
pattern = f"%{escaped}%"
with get_db() as session:
res = session.query(Markdown).filter(
or_(Markdown.title.like(keyword), Markdown.content.like(keyword))
).all()
return jsonify([md.to_dict() for md in res]), 200
or_(
Markdown.title.like(pattern, escape='\\'),
Markdown.content.like(pattern, escape='\\'),
)
).limit(200).all()
return jsonify(_filter_visible(session, res, is_admin)), 200
@markdown_bp.route('/links', methods=['GET'])
@limiter.limit(api.get_rate_limit)

8
app.py
View File

@@ -53,8 +53,12 @@ def log_request():
logger.info(f"Request received: {request.method} {request.path} from {request.remote_addr}")
api.init_rate_limits(app)
if __name__ == '__main__':
api.init_rate_limits(app)
print("env")
pprint(env_provider.summerize())
app.run(host='0.0.0.0', port=5000, debug=True, use_reloader=True)
# The Werkzeug debugger allows remote code execution. Only enable it in
# an explicit dev environment, never in the published production image.
is_dev = env_provider.ENVIRONMENT == "dev"
app.run(host='0.0.0.0', port=5000, debug=is_dev, use_reloader=is_dev)

View File

@@ -23,6 +23,12 @@ KC_CLIENT_ID = os.getenv("KC_CLIENT_ID")
FRONTEND_HOST = os.getenv("FRONTEND_HOST")
BACKEND_HOST = os.getenv("BACKEND_HOST")
def _redact(value):
if not value:
return "<unset>"
return f"<set:{len(str(value))} chars>"
def summerize():
return {
"ENVIRONMENT": ENVIRONMENT,
@@ -30,9 +36,9 @@ def summerize():
'DB_PORT': DB_PORT,
'DB_NAME': DB_NAME,
'DB_USER': DB_USER,
'DB_PASSWORD': DB_PASSWORD,
'DB_PASSWORD': _redact(DB_PASSWORD),
'DB_SCHEMA_UPDATED': DB_SCHEMA_UPDATED,
'SESSION_SECRET_KEY': SESSION_SECRET_KEY,
'SESSION_SECRET_KEY': _redact(SESSION_SECRET_KEY),
'KC_HOST': KC_HOST,
'KC_REALM': KC_REALM,
'KC_CLIENT_ID': KC_CLIENT_ID,

View File

@@ -7,10 +7,55 @@ from events import MARKDOWN_CREATED_EVENT, MARKDOWN_UPDATED_EVENT, MARKDOWN_DELE
PATH_UPDATED_EVENT, PATH_DELETED_EVENT
import abc
import importlib
import ipaddress
import json
import pkgutil
import socket
import requests
import db
from urllib.parse import urlsplit
import logging
logger = logging.getLogger(__name__)
def is_safe_webhook_url(url):
"""
Reject webhook targets that could be used for SSRF: only http/https,
and the resolved host must not be loopback / private / link-local /
reserved. Defends internal services and cloud metadata endpoints
(e.g. 169.254.169.254) even when the stored URL came from a backup.
"""
try:
parts = urlsplit(url)
except Exception:
return False
if parts.scheme not in ("http", "https") or not parts.hostname:
return False
try:
infos = socket.getaddrinfo(parts.hostname, None)
except socket.gaierror:
return False
for info in infos:
ip = ipaddress.ip_address(info[4][0])
if (ip.is_private or ip.is_loopback or ip.is_link_local
or ip.is_reserved or ip.is_multicast or ip.is_unspecified):
return False
return True
def parse_additional_headers(raw):
"""Best-effort parse of the stored additional_header JSON object."""
if not raw:
return {}
try:
parsed = json.loads(raw)
except (ValueError, TypeError):
logger.warning("webhook additional_header is not valid JSON; ignoring")
return {}
if not isinstance(parsed, dict):
return {}
return {str(k): str(v) for k, v in parsed.items()}
event_type_map = {
@@ -36,15 +81,21 @@ class WebhookEventHandler(abc.ABC):
setting = self.get_setting(session, path_id)
if setting is None:
return
webhook_url = setting["webhook_url"]
if not is_safe_webhook_url(webhook_url):
logger.warning("blocked webhook to unsafe URL: %s", webhook_url)
return
headers = {'Content-Type': 'application/json', 'x-alchegos-event': event_type_map[self.event_type]}
if setting.get("additional_header", None) is not None:
headers.update(json.loads(setting["additional_header"]))
headers.update(parse_additional_headers(setting.get("additional_header")))
body = json.dumps(payload, default=str)
try:
response = requests.post(setting["webhook_url"], data=body, headers=headers, timeout=5)
response = requests.post(
webhook_url, data=body, headers=headers,
timeout=5, allow_redirects=False,
)
response.raise_for_status()
except Exception as e:
print(e)
logger.warning("webhook delivery failed: %s", e)
def get_setting(self, session: Session, path_id):