Plexum-openai-provider/internal/runner/session_parse.go

// session_parse — after codex exits, scan its rollout JSONL session
// file for tool calls and tool results. Codex records both native
// (function_call/function_call_output) and MCP (function_call with
// `namespace = "mcp__<server>"`) calls with the same shape and a
// canonical `call_id`. We use those ids when emitting EventToolCall*
// + EventToolResult so the agentic loop's iteration record matches
// what's actually on disk in codex's rollout — which is what
// SessionMutator rewrites on consume.

package runner

import (
	"bufio"
	"encoding/json"
	"errors"
	"os"

	"git.hangman-lab.top/hzhang/Plexum-sdk-go/canonical"
)

// CodexToolCall is one (call_id, name, arguments, output) tuple
// extracted from a rollout JSONL.
type CodexToolCall struct {
	CallID    string
	Name      string
	Namespace string // "mcp__<server>" for MCP calls; "" for native
	Arguments string // JSON string per codex's format
	Output    string // function_call_output.output; "" if no result yet
}

// ParseRolloutToolCalls walks the rollout file once, returning
// every (call_id, name, args, output) tuple in input order. The
// caller emits TurnEvents from this slice. Pairs with no
// function_call_output yet (mid-turn truncation) still appear with
// Output == "".
func ParseRolloutToolCalls(path string) ([]CodexToolCall, error) {
	f, err := os.Open(path)
	if err != nil {
		if errors.Is(err, os.ErrNotExist) {
			return nil, nil
		}
		return nil, err
	}
	defer f.Close()

	// Index calls by call_id so output can attach.
	calls := map[string]*CodexToolCall{}
	var order []string

	sc := bufio.NewScanner(f)
	sc.Buffer(make([]byte, 64*1024), 16*1024*1024)
	for sc.Scan() {
		line := sc.Bytes()
		if len(line) == 0 {
			continue
		}
		var rec struct {
			Type    string `json:"type"`
			Payload struct {
				Type      string `json:"type"`
				Name      string `json:"name"`
				Namespace string `json:"namespace"`
				Arguments string `json:"arguments"`
				CallID    string `json:"call_id"`
				Output    any    `json:"output"`
			} `json:"payload"`
		}
		if err := json.Unmarshal(line, &rec); err != nil {
			continue
		}
		if rec.Type != "response_item" || rec.Payload.CallID == "" {
			continue
		}
		switch rec.Payload.Type {
		case "function_call":
			c, ok := calls[rec.Payload.CallID]
			if !ok {
				c = &CodexToolCall{CallID: rec.Payload.CallID}
				calls[rec.Payload.CallID] = c
				order = append(order, rec.Payload.CallID)
			}
			c.Name = rec.Payload.Name
			c.Namespace = rec.Payload.Namespace
			c.Arguments = rec.Payload.Arguments
		case "function_call_output":
			c, ok := calls[rec.Payload.CallID]
			if !ok {
				// Output without prior call_id seen — synthesize.
				c = &CodexToolCall{CallID: rec.Payload.CallID}
				calls[rec.Payload.CallID] = c
				order = append(order, rec.Payload.CallID)
			}
			c.Output = outputAsString(rec.Payload.Output)
		}
	}
	if err := sc.Err(); err != nil {
		return nil, err
	}
	out := make([]CodexToolCall, 0, len(order))
	for _, id := range order {
		out = append(out, *calls[id])
	}
	return out, nil
}

// outputAsString flattens whatever shape codex writes for function_call_output.
// Often a plain string; sometimes an object with content/text.
func outputAsString(raw any) string {
	switch v := raw.(type) {
	case string:
		return v
	case nil:
		return ""
	default:
		b, err := json.Marshal(v)
		if err != nil {
			return ""
		}
		return string(b)
	}
}

// EmitCodexToolCalls translates the parsed slice into TurnEvents in
// input order. tool_use blocks land in the assistant message;
// EventToolResult lands on the iteration's ToolResults under the
// matching call_id.
func EmitCodexToolCalls(calls []CodexToolCall, emit func(canonical.TurnEvent)) {
	for _, c := range calls {
		// Tool name: keep codex's `function_call.name`. For MCP calls
		// codex stores name without the mcp__ prefix and tracks the
		// server in `namespace`; if you want the original tool name
		// the agent saw (e.g. "mcp__plexum-host-alice__plexum_echo"),
		// build it from namespace + name. We store the plain `.name`
		// so downstream consume sync targets the call_id which is the
		// source of truth.
		emit(canonical.TurnEvent{
			Type:        canonical.EventToolCallStart,
			ToolCallID:  c.CallID,
			ToolName:    c.Name,
			PartialJSON: c.Arguments,
		})
		emit(canonical.TurnEvent{
			Type:       canonical.EventToolCallEnd,
			ToolCallID: c.CallID,
		})
		emit(canonical.TurnEvent{
			Type: canonical.EventToolResult,
			ToolResult: &canonical.ToolResultBlock{
				Type:      canonical.BlockTypeToolResult,
				ToolUseID: c.CallID,
				Content: []canonical.Block{
					canonical.NewTextBlock(c.Output),
				},
			},
		})
	}
}