Files
SynthesisAgent.OpenclawPlugin/core/process-manager.ts
zhi 3229fbd024 fix: end-to-end working after laptop integration test
Discovered during smoke-testing on hzhang's laptop:

1. `--channels server:X --dangerously-load-development-channels server:X`
   makes Claude Code list the channel twice and the second copy never
   inherits dev-mode, leaving "server: entries need
   --dangerously-load-development-channels" stuck in the status panel.
   Fix: pass channel ONLY via --dangerously-load-development-channels.

2. Without a controlling TTY, Claude Code's dev-mode confirmation dialog
   blocks forever waiting for keystrokes that never arrive. Fix: spawn
   claude wrapped in `script -q -c CMD PTYLOG` so it gets a PTY, then
   write "\r" to stdin at several timeouts (cheap to over-send).

3. process-manager.markReady was matching on PID, but the PID in the
   bridge hello frame is the ClaudePlugin (bun) process's pid, not the
   script-wrapped claude process's pid we tracked. Fix: match on
   openclaw-session-key, which is consistent on both sides.

4. First spawn for a new session can't use --resume (no transcript exists
   yet) — claude errors out. Fix: probe
   ~/.claude/projects/<workspace-slug>/<uuid>.jsonl for existence and use
   --session-id on fresh sessions, --resume after a process restart.

5. Add --debug-file per session so future debugging has the gating logs.

6. Local definePluginEntry shim (no openclaw runtime dependency) so
   `bun index.ts` works standalone for laptop smoke tests.

End-to-end verified twice on laptop: curl POST -> SSE delta with the
exact reply text. Average cold-start ~10s, hot path 2-3s.
2026-05-14 14:00:32 +00:00

235 lines
8.7 KiB
TypeScript

import { spawn, type ChildProcess } from 'node:child_process'
import { existsSync } from 'node:fs'
import { join } from 'node:path'
import { homedir } from 'node:os'
import type { SynthesisConfig } from './config.js'
import type { SessionMapping } from './session-mapping.js'
export interface ProcessHandle {
pid: number
openclawSessionKey: string
claudeSessionUuid: string
workspace: string
proc: ChildProcess
startedAt: number
lastActiveAt: number
/** Resolves once ClaudePlugin sends `hello` over the bridge WS. */
ready: Promise<void>
markReady: () => void
}
export interface ProcessManagerDeps {
config: SynthesisConfig
mapping: SessionMapping
logger?: { info: (...a: unknown[]) => void; warn: (...a: unknown[]) => void }
}
/**
* Owns the pool of long-lived `claude` subprocesses. One per OpenClaw
* session-key (`agent_id::chat_id` per contractor-agent's convention).
* Lazily spawned on first dispatch and reaped after `idleKillMs` of idle.
*
* Spawn shape:
* claude --channels server:<channelName>
* --dangerously-load-development-channels server:<channelName>
* --resume <claude_session_uuid>
* --permission-mode <permissionMode>
* --dangerously-skip-permissions ← only when permissionMode=bypassPermissions
* AND we're allowed to (non-root or sandbox)
* env: SYNTHESIS_WS_URL, SYNTHESIS_OPENCLAW_SESSION, SYNTHESIS_CLAUDE_SESSION
*
* The spawned `--dangerously-load-development-channels` will trigger an
* interactive confirmation dialog by default. We handle this by piping a
* "1\n" to stdin shortly after spawn (the first option is the dev-mode
* confirmation). The Claude process's first turn comes from the channel
* notification, not from stdin.
*/
export class ProcessManager {
private byKey = new Map<string, ProcessHandle>()
private idleSweeper: ReturnType<typeof setInterval> | null = null
private shuttingDown = false
private log: NonNullable<ProcessManagerDeps['logger']>
constructor(private deps: ProcessManagerDeps) {
this.log = deps.logger ?? {
info: (...a) => process.stderr.write(`[synthesis-pm] ${a.join(' ')}\n`),
warn: (...a) => process.stderr.write(`[synthesis-pm] WARN ${a.join(' ')}\n`),
}
this.idleSweeper = setInterval(() => this.sweepIdle(), 60_000)
}
/** Spawn-if-needed and await the ClaudePlugin handshake. */
async ensure(openclawSessionKey: string, workspace: string): Promise<ProcessHandle> {
const existing = this.byKey.get(openclawSessionKey)
if (existing && !existing.proc.killed && existing.proc.exitCode === null) {
existing.lastActiveAt = Date.now()
return existing
}
if (this.byKey.size >= this.deps.config.maxProcesses) this.evictOldestIdle()
const rec = this.deps.mapping.ensure(openclawSessionKey)
const handle = this.spawn(openclawSessionKey, rec.claudeSessionUuid, workspace)
this.byKey.set(openclawSessionKey, handle)
await Promise.race([
handle.ready,
new Promise<never>((_, rej) =>
setTimeout(() => rej(new Error('claude spawn ready timeout (15s)')), 15_000),
),
])
this.deps.mapping.touch(openclawSessionKey)
return handle
}
/** Bridge server calls this when it sees a `hello` frame for this session. */
markReady(openclawSessionKey: string): void {
const h = this.byKey.get(openclawSessionKey)
if (h) h.markReady()
}
touch(openclawSessionKey: string): void {
const h = this.byKey.get(openclawSessionKey)
if (h) h.lastActiveAt = Date.now()
}
private spawn(openclawSessionKey: string, claudeSessionUuid: string, workspace: string): ProcessHandle {
const { config } = this.deps
const wsUrl = `ws://127.0.0.1:${config.channelWsPort}/bridge`
const channelTag = `server:${config.channelName}`
// First spawn for this session-id uses --session-id (fresh); subsequent
// spawns (after idle reap / OOM / crash) use --resume to pick up the
// saved transcript at ~/.claude/projects/<workspace-slug>/<uuid>.jsonl.
const hasExistingSession = claudeSessionFileExists(workspace, claudeSessionUuid)
const sessionFlag = hasExistingSession
? ['--resume', claudeSessionUuid]
: ['--session-id', claudeSessionUuid]
// Pass channel ONLY via --dangerously-load-development-channels.
// Passing the same channel through both --channels and
// --dangerously-load-development-channels makes Claude Code list it
// twice ("Listening for channel messages from: X, X") and the second
// entry never inherits the dev-mode flag — the status panel ends up
// saying "server: entries need --dangerously-load-development-channels"
// even after the dev-mode confirmation. The dev flag implies the
// channel is loaded, so --channels is redundant here.
const debugFile = `/tmp/synthesis-claude-${openclawSessionKey.replace(/[^a-z0-9]/gi, '_')}.debug.log`
const args = [
'--dangerously-load-development-channels', channelTag,
...sessionFlag,
'--permission-mode', config.permissionMode,
'--debug-file', debugFile,
]
this.log.info(`spawning claude session=${openclawSessionKey} claude_uuid=${claudeSessionUuid} workspace=${workspace}`)
// Wrap claude in `script(1)` to give it a PTY. Without a TTY, Claude
// Code's interactive prompts (workspace-trust, dev-channels confirm)
// block forever waiting for keystrokes that never arrive. `script -q -c`
// forks a PTY, runs the command inside it, and discards the transcript
// to /dev/null. Stdin we write to is the PTY master.
const claudeCmd = ['claude', ...args].map(a => /[\s"]/.test(a) ? `"${a.replace(/"/g, '\\"')}"` : a).join(' ')
const ptyLog = `/tmp/synthesis-claude-${openclawSessionKey.replace(/[^a-z0-9]/gi, '_')}.pty`
const proc = spawn('script', ['-q', '-c', claudeCmd, ptyLog], {
cwd: workspace,
env: {
...process.env,
SYNTHESIS_WS_URL: wsUrl,
SYNTHESIS_OPENCLAW_SESSION: openclawSessionKey,
SYNTHESIS_CLAUDE_SESSION: claudeSessionUuid,
TERM: 'xterm-256color',
},
stdio: ['pipe', 'pipe', 'pipe'],
detached: false,
})
// Pre-feed Enter keystrokes through the PTY to dismiss any startup
// prompts (dev-channels confirm; trusted-workspace if anything slipped
// through). Cheap to over-send — Claude eats unexpected keystrokes.
for (const delay of [800, 1500, 2500, 4000, 6000]) {
setTimeout(() => {
try { proc.stdin?.write('\r') } catch { /* ignore */ }
}, delay)
}
let resolveReady!: () => void
const ready = new Promise<void>(r => { resolveReady = r })
const handle: ProcessHandle = {
pid: proc.pid ?? -1,
openclawSessionKey,
claudeSessionUuid,
workspace,
proc,
startedAt: Date.now(),
lastActiveAt: Date.now(),
ready,
markReady: () => resolveReady(),
}
proc.on('exit', code => {
this.byKey.delete(openclawSessionKey)
this.log.info(`claude exit session=${openclawSessionKey} pid=${handle.pid} code=${code}`)
})
proc.stderr?.on('data', chunk => {
process.stderr.write(`[claude:${handle.pid}] ${chunk}`)
})
return handle
}
private sweepIdle(): void {
if (this.shuttingDown) return
const cutoff = Date.now() - this.deps.config.idleKillMs
for (const h of this.byKey.values()) {
if (h.lastActiveAt < cutoff) {
this.log.info(`idle-killing session=${h.openclawSessionKey} pid=${h.pid}`)
h.proc.kill('SIGTERM')
}
}
}
private evictOldestIdle(): void {
let oldest: ProcessHandle | null = null
for (const h of this.byKey.values()) {
if (!oldest || h.lastActiveAt < oldest.lastActiveAt) oldest = h
}
if (oldest) {
this.log.info(`evicting session=${oldest.openclawSessionKey} (max processes reached)`)
oldest.proc.kill('SIGTERM')
}
}
list(): ProcessHandle[] {
return [...this.byKey.values()]
}
/** Async helper exposed for tests / admin tools. */
static sessionFileExists(workspace: string, uuid: string): boolean {
return claudeSessionFileExists(workspace, uuid)
}
async shutdown(): Promise<void> {
this.shuttingDown = true
if (this.idleSweeper) clearInterval(this.idleSweeper)
for (const h of this.byKey.values()) {
try { h.proc.kill('SIGTERM') } catch { /* ignore */ }
}
this.byKey.clear()
}
}
/**
* Claude Code persists session transcripts at:
* ~/.claude/projects/<workspace-slug>/<session-uuid>.jsonl
* where workspace-slug is the absolute workspace path with `/` → `-`.
*/
function claudeSessionFileExists(workspace: string, uuid: string): boolean {
try {
const slug = workspace.replace(/\//g, '-')
const path = join(homedir(), '.claude', 'projects', slug, `${uuid}.jsonl`)
return existsSync(path)
} catch {
return false
}
}