From 045c7c51d6063911d1cc86cb1a6e4aae5239be5f Mon Sep 17 00:00:00 2001 From: hzhang Date: Sat, 16 May 2026 16:12:56 +0100 Subject: [PATCH] Security hardening: prevent stored XSS and render crashes - MarkdownView: add rehype-sanitize between rehype-raw and rehype-katex to strip scripts/event-handlers/javascript: URLs from user-authored markdown (was stored XSS, also affected the public /pg/* route); keep className on code/span/div so KaTeX and syntax highlighting still work. Add rehype-sanitize ^6.0.0 to deps and lockfile. - MarkdownContent / StandaloneMarkdownPage: parse markdown content via parseMarkdownContent() instead of an unguarded JSON.parse, so a single corrupt/legacy record no longer white-screens the whole page. Co-Authored-By: Claude Opus 4.7 (1M context) --- package-lock.json | 30 +++++++++++++++++++ package.json | 1 + src/components/Markdowns/MarkdownContent.js | 3 +- src/components/Markdowns/MarkdownView.js | 18 ++++++++++- .../Markdowns/StandaloneMarkdownPage.js | 3 +- src/utils/safe-json.js | 11 +++++++ 6 files changed, 63 insertions(+), 3 deletions(-) create mode 100644 src/utils/safe-json.js diff --git a/package-lock.json b/package-lock.json index 4b9e296..c02dc3c 100644 --- a/package-lock.json +++ b/package-lock.json @@ -29,6 +29,7 @@ "redux": "^5.0.1", "rehype-katex": "^7.0.1", "rehype-raw": "^7.0.0", + "rehype-sanitize": "^6.0.0", "remark-gfm": "^4.0.0", "remark-math": "^6.0.0", "util": "^0.12.5" @@ -6263,6 +6264,21 @@ "url": "https://opencollective.com/unified" } }, + "node_modules/hast-util-sanitize": { + "version": "5.0.2", + "resolved": "https://registry.npmjs.org/hast-util-sanitize/-/hast-util-sanitize-5.0.2.tgz", + "integrity": "sha512-3yTWghByc50aGS7JlGhk61SPenfE/p1oaFeNwkOOyrscaOkMGrcW9+Cy/QAIOBpZxP1yqDIzFMR0+Np0i0+usg==", + "license": "MIT", + "dependencies": { + "@types/hast": "^3.0.0", + "@ungap/structured-clone": "^1.0.0", + "unist-util-position": "^5.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, "node_modules/hast-util-to-jsx-runtime": { "version": "2.3.2", "resolved": "https://registry.npmjs.org/hast-util-to-jsx-runtime/-/hast-util-to-jsx-runtime-2.3.2.tgz", @@ -10868,6 +10884,20 @@ "url": "https://opencollective.com/unified" } }, + "node_modules/rehype-sanitize": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/rehype-sanitize/-/rehype-sanitize-6.0.0.tgz", + "integrity": "sha512-CsnhKNsyI8Tub6L4sm5ZFsme4puGfc6pYylvXo1AeqaGbjOYyzNv3qZPwvs0oMJ39eryyeOdmxwUIo94IpEhqg==", + "license": "MIT", + "dependencies": { + "@types/hast": "^3.0.0", + "hast-util-sanitize": "^5.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, "node_modules/relateurl": { "version": "0.2.7", "resolved": "https://registry.npmjs.org/relateurl/-/relateurl-0.2.7.tgz", diff --git a/package.json b/package.json index 268ee7d..3656432 100644 --- a/package.json +++ b/package.json @@ -32,6 +32,7 @@ "redux": "^5.0.1", "rehype-katex": "^7.0.1", "rehype-raw": "^7.0.0", + "rehype-sanitize": "^6.0.0", "remark-gfm": "^4.0.0", "remark-math": "^6.0.0", "util": "^0.12.5" diff --git a/src/components/Markdowns/MarkdownContent.js b/src/components/Markdowns/MarkdownContent.js index 5ce1b8f..7c88efd 100644 --- a/src/components/Markdowns/MarkdownContent.js +++ b/src/components/Markdowns/MarkdownContent.js @@ -10,6 +10,7 @@ import {useMarkdownSetting} from "../../utils/queries/markdown-setting-queries"; import {useMarkdownTemplate} from "../../utils/queries/markdown-template-queries"; import {useMarkdownTemplateSetting} from "../../utils/queries/markdown-template-setting-queries"; import MarkdownSettingModal from "../Modals/MarkdownSettingModal"; +import { parseMarkdownContent } from "../../utils/safe-json"; const MarkdownContent = () => { const { strId } = useParams(); @@ -68,7 +69,7 @@ const MarkdownContent = () => { - + { }; +// Markdown content is authored by users and rendered for everyone +// (including the unauthenticated /pg/* route), so raw HTML must be +// sanitized to prevent stored XSS. className is kept on code/span/div so +// syntax highlighting and KaTeX (which runs after sanitize) still work; +// scripts, event handlers and javascript: URLs are stripped. +const sanitizeSchema = { + ...defaultSchema, + attributes: { + ...defaultSchema.attributes, + code: [...(defaultSchema.attributes?.code || []), ["className"]], + span: [...(defaultSchema.attributes?.span || []), ["className"]], + div: [...(defaultSchema.attributes?.div || []), ["className"]], + }, +}; + const MarkdownView = ({ content, template, height="auto" }) => { const {data: links, isLoading} = useLinks(); @@ -74,7 +90,7 @@ const MarkdownView = ({ content, template, height="auto" }) => { variables: content }) + "\n" + linkDefinitions} remarkPlugins={[remarkMath, remarkGfm]} - rehypePlugins={[rehypeKatex, rehypeRaw]} + rehypePlugins={[rehypeRaw, [rehypeSanitize, sanitizeSchema], rehypeKatex]} components={{ code({ node, inline, className, children, ...props }) { const match = /language-(\w+)/.exec(className || ""); diff --git a/src/components/Markdowns/StandaloneMarkdownPage.js b/src/components/Markdowns/StandaloneMarkdownPage.js index fa21520..7193c97 100644 --- a/src/components/Markdowns/StandaloneMarkdownPage.js +++ b/src/components/Markdowns/StandaloneMarkdownPage.js @@ -9,6 +9,7 @@ import { useMarkdownTemplate } from "../../utils/queries/markdown-template-queri import { useMarkdownTemplateSetting } from "../../utils/queries/markdown-template-setting-queries"; import { useTree } from "../../utils/queries/tree-queries"; import { getMarkdownIdByPath } from "../../utils/pathUtils"; +import { parseMarkdownContent } from "../../utils/safe-json"; const StandaloneMarkdownPage = () => { const location = useLocation(); @@ -91,7 +92,7 @@ const StandaloneMarkdownPage = () => {

{markdown?.title === "index" ? indexTitle : markdown?.title}

{markdown && ( - + )} ); diff --git a/src/utils/safe-json.js b/src/utils/safe-json.js new file mode 100644 index 0000000..918b153 --- /dev/null +++ b/src/utils/safe-json.js @@ -0,0 +1,11 @@ +// Markdown `content` is a JSON string. Records created via the API (or +// legacy/corrupt data) may not be valid JSON; an unguarded JSON.parse in a +// render path throws and white-screens the whole page (including the public +// /pg/* route). Parse defensively and degrade to a readable fallback. +export function parseMarkdownContent(raw) { + try { + return JSON.parse(raw); + } catch (e) { + return { markdown: "> ⚠️ This document could not be displayed: its stored content is not valid." }; + } +}