/** * OpenClaw Perf Cache — fs.stat/lstat/realpath TTL memo for plugin-tree paths. * * Why this exists * =============== * * Upstream openclaw's `loadPluginMetadataSnapshot()` (in * `dist/plugin-metadata-snapshot-*.js`) maintains a memo cache of the plugin * registry, but the cache-validity check it runs on every lookup itself does * O(N) filesystem work: * * resolvePersistedRegistryMemoStateForLookup(params, memo): * ... * if (registryState && contextHash matches && fastHash matches * && hashWatchedFiles(registryState.watchedFiles) === registryState.watchedFilesHash) * return registryState; // ← `hashWatchedFiles` re-fingerprints every watched file * * `hashWatchedFiles` calls `fileFingerprint(path)` (statx) for every plugin * package.json + openclaw.plugin.json + source + setupSource, and the * watched-file collection is built by `persistedPluginFileFingerprint` which * in turn calls `resolvePluginFilePath` -> `tryRealpath` (which is * `fs.realpathSync`, walking the ancestor chain via `lstat` for each path * segment). * * On prod t2 (~100 installed plugins, mostly bundled extensions under * `/usr/lib/node_modules/openclaw/dist/extensions//`), one cache-check * call costs roughly: * * 100 plugins × 4 watched files/plugin × 2 realpath/file × ~8 lstat/realpath * ≈ 6400 lstat + ~400 stat per call (~6-7s of CPU per call) * * The deprecated `loadConfig()` path was firing one of these every 30s from * HF plugin's `pushMetaToMonitor` (separate fix: zhi/HarborForge.OpenclawPlugin#11), * and every agent turn fires one too (per the tool middleware loader). The * push-driven baseline is gone; the per-turn cost is the remaining chronic * load. * * Two upstream tickets have closed without a fix for this same hot path: * - https://github.com/openclaw/openclaw/issues/67040 (closed as not planned) * - https://github.com/openclaw/openclaw/issues/75297 (closed, no fix; rollback to 2026.4.23 was the workaround) * * What this plugin does * ===================== * * On `register()` (which runs before any agent turn), wrap the global * `fs.statSync`, `fs.lstatSync`, `fs.realpathSync` and their `fs.promises` * counterparts with a small TTL memo. The wrapper is a no-op (pass-through) * for any path that is NOT under a plugin tree, so general fs use elsewhere * is unaffected. * * Path whitelist (anything else falls through to the original): * - `/openclaw/dist/extensions/` (bundled openclaw channel SDKs) * - `/.openclaw/plugins/` (user-installed plugins) * - `/node_modules/@openclaw/` (managed npm plugin packages) * - `/openclaw/plugin-sdk/` (SDK module imports) * * TTL: 1000ms. Within that window, repeated stats of the same path return the * cached result. Two cache-check calls back-to-back (which is what the * snapshot lookup does on each invocation) now cost ~0 instead of ~7s. * * Safety * ====== * * - The wrappers are bound on the original functions, so `this` and the full * argument list are preserved (including `options` like `{ bigint: true }`). * - Cache key includes a JSON of the trailing args so different option shapes * for the same path don't collide (e.g. `statSync(p)` vs `statSync(p,{bigint:true})`). * - Pass-through for non-plugin paths: business code (logs, session files, * skills/, secrets/, anything outside the whitelist) sees the unmodified fs. * - 1s TTL: plugin manifest mtime resolution is ms-level, so a manifest change * becomes visible at most ~1s later. dev-loop impact is negligible. * - Bounded memory: cache.clear() when >4000 entries (~few hundred KB max). * - Idempotent: a sentinel flag prevents double-wrapping across plugin reloads. * - Counts are tracked and logged every minute so we can see hit ratio in * journalctl and validate the workaround is actually firing. * * If openclaw ever fixes the upstream cache-validity-check (issue text in * the comment above), this plugin can be uninstalled without consequence. */ import fs from 'node:fs'; import type { Stats, BigIntStats } from 'node:fs'; const TTL_MS = 1000; const SOFT_CAP = 4000; // Path-prefix substring match. Anything that matches → memoized. Anything // that doesn't → pass-through to the original. Keep the list short and only // add patterns where the same path is statted many times per second by the // plugin-discovery hot path. const HOT_PATH_NEEDLES = [ '/openclaw/dist/extensions/', '/.openclaw/plugins/', '/node_modules/@openclaw/', '/openclaw/plugin-sdk/', ]; function isHotPath(p: unknown): p is string { if (typeof p !== 'string') return false; for (const needle of HOT_PATH_NEEDLES) { if (p.includes(needle)) return true; } return false; } interface CacheEntry { result: unknown; isError: boolean; expiresAt: number; } const cache = new Map(); const counters = { hits: 0, misses: 0, passthrough: 0, errors: 0 }; function evictIfFull() { if (cache.size > SOFT_CAP) cache.clear(); } function buildKey(name: string, path: string, args: unknown[]): string { // args[0] for these is typically a Buffer encoding or {bigint:true} // option — keep it in the key so different shapes don't collide. let opts = ''; if (args.length > 0) { try { opts = JSON.stringify(args); } catch { opts = String(args.length); } } return `${name}\x00${path}\x00${opts}`; } function wrapSync any>( name: string, orig: F, ): F { const wrapped = function (this: unknown, path: unknown, ...rest: any[]): any { if (!isHotPath(path)) { counters.passthrough++; return orig.call(this, path, ...rest); } const now = Date.now(); const key = buildKey(name, path, rest); const hit = cache.get(key); if (hit && hit.expiresAt > now) { counters.hits++; if (hit.isError) throw hit.result; return hit.result; } counters.misses++; evictIfFull(); let result: unknown; let isError = false; try { result = orig.call(this, path, ...rest); } catch (err) { // ENOENT and friends — cache the error too so repeated "does this file // exist" probes don't restat the kernel. Same TTL applies. result = err; isError = true; counters.errors++; } cache.set(key, { result, isError, expiresAt: now + TTL_MS }); if (isError) throw result; return result; }; return wrapped as unknown as F; } function wrapAsync Promise>( name: string, orig: F, ): F { const wrapped = async function (this: unknown, path: unknown, ...rest: any[]): Promise { if (!isHotPath(path)) { counters.passthrough++; return orig.call(this, path, ...rest); } const now = Date.now(); const key = buildKey(name, path, rest); const hit = cache.get(key); if (hit && hit.expiresAt > now) { counters.hits++; if (hit.isError) throw hit.result; return hit.result; } counters.misses++; evictIfFull(); try { const result = await orig.call(this, path, ...rest); cache.set(key, { result, isError: false, expiresAt: now + TTL_MS }); return result; } catch (err) { counters.errors++; cache.set(key, { result: err, isError: true, expiresAt: now + TTL_MS }); throw err; } }; return wrapped as unknown as F; } interface PluginAPI { logger?: { info?: (...args: unknown[]) => void; warn?: (...args: unknown[]) => void; debug?: (...args: unknown[]) => void; }; on?: (event: string, handler: () => void) => void; } const SENTINEL = '__openclawPerfCacheInstalled' as const; const _G = globalThis as Record; let statsTimer: ReturnType | null = null; function install(logger: PluginAPI['logger']): void { if (_G[SENTINEL]) { logger?.debug?.('[perf-cache] already installed; skipping'); return; } _G[SENTINEL] = true; // Sync versions fs.statSync = wrapSync('statSync', fs.statSync) as typeof fs.statSync; fs.lstatSync = wrapSync('lstatSync', fs.lstatSync) as typeof fs.lstatSync; fs.realpathSync = wrapSync('realpathSync', fs.realpathSync) as typeof fs.realpathSync; // Async (promises) versions — `lstat` was hot on prod profile (~38% even // after the push-driven baseline was killed) const fsp = fs.promises; fsp.stat = wrapAsync('stat', fsp.stat.bind(fsp)) as typeof fsp.stat; fsp.lstat = wrapAsync('lstat', fsp.lstat.bind(fsp)) as typeof fsp.lstat; fsp.realpath = wrapAsync('realpath', fsp.realpath.bind(fsp)) as typeof fsp.realpath; logger?.info?.( '[perf-cache] installed fs.{stat,lstat,realpath}Sync + fs.promises.{stat,lstat,realpath} ' + `with ${TTL_MS}ms TTL for plugin-tree paths only`, ); // Periodic counter log so we can see hit ratio in journalctl. Reset after // logging so each line is "since last log". statsTimer = setInterval(() => { const total = counters.hits + counters.misses; if (total === 0 && counters.passthrough === 0) return; const hitRatio = total > 0 ? ((counters.hits / total) * 100).toFixed(1) : '0.0'; logger?.info?.( `[perf-cache] last 60s: hits=${counters.hits} misses=${counters.misses} ` + `(hit-ratio ${hitRatio}%) passthrough=${counters.passthrough} ` + `errors=${counters.errors} cache_size=${cache.size}`, ); counters.hits = 0; counters.misses = 0; counters.passthrough = 0; counters.errors = 0; }, 60_000); // Don't keep the process alive just for this timer. statsTimer.unref?.(); } export default { id: 'openclaw-perf-cache', name: 'OpenClaw Perf Cache', register(api: PluginAPI): void { // Install *immediately* on register() — the snapshot lookups happen during // plugin loading and per agent turn, both of which need the wrapper in // place before they run. There's no `gateway_start` hook on the critical // path that fires before those. install(api.logger); api.on?.('gateway_stop', () => { if (statsTimer) { clearInterval(statsTimer); statsTimer = null; } // Note: we intentionally DON'T uninstall the fs wrappers on gateway_stop. // jiti caches module instances, and uninstalling at stop with reinstall // at next start would double-wrap on reload. Leaving the wrappers in // place is harmless — the sentinel check in install() guarantees we // never wrap twice. }); }, };