Files
Plexum-fabric-channel-plugin/cmd/plexum-fabric-channel-plugin/main.go
hzhang 0efcdfd342 feat: Phase F-2 — socket.io inbound + wakeup gate + token refresh
End-to-end Fabric inbound→Plexum→Fabric outbound now works against a
live Fabric stack:

  alice posts in bt2-clean (Fabric REST)
    → guild emits message.created over socket.io
    → plugin's wakeup gate decides dispatch
    → notifications/plexum/channel/inbound to host
    → Plexum agent runs (echo provider)
    → outbound `send` tool posts via Fabric REST
    → fabrictester reply visible in channel

internal/socketio/ (~280 LOC + 2 tests):
- Minimal Engine.IO v4 + Socket.IO v5 client over websocket
- WebSocket-only transport (skip polling upgrade dance)
- AuthFunc callback re-evaluated on every (re)connect — fixes the
  stale-JWT-on-reconnect bug openclaw plugin documented for the JS
  client's single-shot auth, which the available Go socket.io
  library (zishang520) doesn't address either
- PING/PONG per server-supplied interval
- Caller-driven reconnect: Connect returns on close, supervisor
  re-dials with fresh token

internal/tokens/ (~95 LOC + 9 tests):
- Per-agent session cache with 8min TTL (matches openclaw's
  TOKEN_TTL_MS); guild tokens are ~15min so 8min keeps a margin
- Invalidate forces re-login (used by inbound when CONNECT auth fires)
- GuildToken helper picks the per-guild JWT from the cached session;
  if the guild is missing from the cache, invalidate + retry once

internal/inbound/ (~290 LOC):
- Supervisor: one socket.io conn per (agent, guild); reconnect with
  fresh token on drop; ChannelSyncInterval (60s) polling + push
  channel.joined/channel.left handlers
- Wakeup gate: dm channels deliver any non-self message; other
  x_types require wakeup=true (record-only for non-wake non-dm
  deferred — Plexum has no history-injection equivalent in v1)
- Self-author filter on selfUserId from cached session
- Per-(agent,msgId) dedup bounded to 5000 entries
- Per-channel serial queue with 5s idle drain so concurrent inbounds
  on the same channel run one-at-a-time (matches openclaw plugin)
- Emits notifications/plexum/channel/inbound with session_id =
  "s_fab_<fabric_channel_id>" for stable per-channel session continuity

cmd/plexum-fabric-channel-plugin:
- Wires inbound supervisor at Init; runs in a background goroutine
  for the plugin's lifetime
- Replaces F-1's sessions map with tokens.Cache (same warm-sessions
  behavior, now backed by TTL)
- hostLogHandler: bridges slog records from inbound supervisor to
  HostAPI.Log notifications

F-2 deferred to F-3+:
- record-only history injection (Plexum v1 has no equivalent)
- tools.ts port (15 MCP tools — channel/canvas/sub-discussion family)
- presence-sync, command-sync, attachments, coalesce parity

Tests: 22 (5 identity + 6 config + 9 tokens + 2 socketio).

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-31 15:29:01 +01:00

375 lines
12 KiB
Go

// plexum-fabric-channel-plugin is the Plexum channel plugin that
// connects Plexum agents to a Fabric guild as members.
//
// F-1 (current): identity load, channel config discovery, REST send,
// agentLogin handshake. Plugin advertises channels via manifest +
// reads channels/<name>.json for the Plexum-channel → Fabric-channel
// mapping. The `send` outbound tool posts via Fabric REST.
//
// F-2+ (deferred): socket.io inbound, wakeup gating, token refresh,
// presence sync, sub-discussion, MCP tool surface (~15 tools from
// the openclaw plugin's tools.ts), attachments, channel canvas.
package main
import (
"context"
"encoding/json"
"errors"
"fmt"
"log/slog"
"os"
"path/filepath"
"sync"
"time"
plugin "git.hangman-lab.top/hzhang/Plexum-sdk-go/plugin"
"git.hangman-lab.top/hzhang/Plexum-fabric-channel-plugin/internal/config"
"git.hangman-lab.top/hzhang/Plexum-fabric-channel-plugin/internal/fabric"
"git.hangman-lab.top/hzhang/Plexum-fabric-channel-plugin/internal/identity"
"git.hangman-lab.top/hzhang/Plexum-fabric-channel-plugin/internal/inbound"
"git.hangman-lab.top/hzhang/Plexum-fabric-channel-plugin/internal/tokens"
)
// HostConfig is the plugin's own config at
// <profile>/plugins/plexum-fabric-channel/config.json:
//
// {
// "center_api_base": "http://localhost:7001/api"
// }
type HostConfig struct {
CenterAPIBase string `json:"center_api_base"`
}
type fabricPlugin struct {
host plugin.HostAPI
cfgPath string
cfg HostConfig
identities *identity.Registry
bindings []config.FabricBinding
byFabric config.ByFabricChannel
client *fabric.Client
tokens *tokens.Cache
// Goroutine handle for the inbound supervisor. Cancelled on
// plugin shutdown (we don't have an explicit shutdown signal in
// the SDK today; rely on subprocess kill).
inboundCancel context.CancelFunc
inboundDone chan struct{}
// Legacy field — kept only for back-compat with non-tokens code
// paths during the F-1 → F-2 refactor; safe to remove once nothing
// else references it. Not used anymore.
sessMu sync.Mutex
sessions map[string]*fabric.Session
}
func (p *fabricPlugin) Manifest() plugin.Manifest {
// Manifest channels are populated dynamically from channels/*.json
// at startup: the operator adds a channels/<name>.json + restarts
// the gateway, and the matching ChannelContract entry surfaces here.
// Both halves needed because Plexum's host registry reads the
// manifest's channel names too.
channels := p.dynamicChannelContracts()
return plugin.Manifest{
Name: config.PluginName,
Version: "0.1.0",
Activation: plugin.ActivationLazy,
Executable: "plexum-fabric-channel-plugin",
Contracts: plugin.Contracts{
Channels: channels,
Tools: []plugin.ToolContract{
{
Name: "send",
Description: "Post a plain-text message to the bound Fabric channel as the agent user.",
InputSchema: json.RawMessage(`{
"type": "object",
"properties": {
"channel_name": {"type": "string"},
"session_id": {"type": "string"},
"message": {"type": "string"}
},
"required": ["channel_name", "message"]
}`),
},
},
},
}
}
func (p *fabricPlugin) dynamicChannelContracts() []plugin.ChannelContract {
// Read channels/*.json from <profile>/channels and surface every
// `plugin: plexum-fabric-channel` entry as a ChannelContract.
profileRoot := os.Getenv("PLEXUM_PROFILE_ROOT")
if profileRoot == "" {
home, _ := os.UserHomeDir()
profileRoot = filepath.Join(home, ".plexum")
}
bindings, err := config.Load(filepath.Join(profileRoot, "channels"))
if err != nil {
// Logged later in Init; manifest call can't itself reach a logger.
return nil
}
out := make([]plugin.ChannelContract, 0, len(bindings))
for _, b := range bindings {
out = append(out, plugin.ChannelContract{
Name: b.PlexumChannelName, OutboundTool: "send",
})
}
return out
}
func (p *fabricPlugin) Init(ctx context.Context, host plugin.HostAPI) error {
p.host = host
p.sessions = map[string]*fabric.Session{}
profileRoot := os.Getenv("PLEXUM_PROFILE_ROOT")
if profileRoot == "" {
home, _ := os.UserHomeDir()
profileRoot = filepath.Join(home, ".plexum")
}
// Plugin-private config.
p.cfgPath = filepath.Join(profileRoot, "plugins", config.PluginName, "config.json")
raw, err := os.ReadFile(p.cfgPath)
if err != nil && !errors.Is(err, os.ErrNotExist) {
return fmt.Errorf("read %s: %w", p.cfgPath, err)
}
if len(raw) > 0 {
if err := json.Unmarshal(raw, &p.cfg); err != nil {
return fmt.Errorf("parse %s: %w", p.cfgPath, err)
}
}
if p.cfg.CenterAPIBase == "" {
p.cfg.CenterAPIBase = "http://localhost:7001/api"
}
p.client = fabric.New(p.cfg.CenterAPIBase)
// Identity.
idPath := filepath.Join(profileRoot, identity.FileName)
p.identities, err = identity.Open(idPath)
if err != nil {
return fmt.Errorf("identity: %w", err)
}
// Channel bindings.
p.bindings, err = config.Load(filepath.Join(profileRoot, "channels"))
if err != nil {
return fmt.Errorf("channel bindings: %w", err)
}
p.byFabric = config.Index(p.bindings)
// Token cache: re-login per agent on TTL miss (8min default).
p.tokens = tokens.New(0, func(loginCtx context.Context, agentID string) (*fabric.Session, error) {
entry := p.identities.Lookup(agentID)
if entry == nil || !entry.Enabled {
return nil, fmt.Errorf("agent %s: no enabled identity", agentID)
}
return p.client.AgentLogin(loginCtx, entry.FabricAPIKey)
})
host.Log("info", "fabric channel plugin initialized", map[string]any{
"center": p.cfg.CenterAPIBase,
"identity_path": idPath,
"channels_loaded": len(p.bindings),
"identities_loaded": len(p.identities.AgentIDs()),
})
// Warm sessions (early bad-key detection).
if err := p.warmSessions(ctx); err != nil {
host.Log("warn", "fabric warm-sessions had errors",
map[string]any{"err": err.Error()})
}
// Phase F-2: start the inbound supervisor in a goroutine. Lives
// until p.inboundCancel fires (currently never — SDK has no
// shutdown hook; subprocess kill is the only stop signal).
if len(p.bindings) > 0 {
ctxBg, cancel := context.WithCancel(context.Background())
p.inboundCancel = cancel
p.inboundDone = make(chan struct{})
notifier := func(channelName, message, sessionID string) {
p.host.EmitNotification("notifications/plexum/channel/inbound", map[string]any{
"channel_name": channelName,
"message": message,
"session_id": sessionID,
})
}
// slog wrapping plugin.HostAPI.Log isn't worth the indirection
// here; use a discard-style adapter that pipes WARN/INFO to
// the host log.
logger := slog.New(&hostLogHandler{host: host, level: slog.LevelInfo})
sup := inbound.New(p.client, p.tokens, p.bindings, notifier, logger)
go func() {
defer close(p.inboundDone)
if err := sup.Run(ctxBg); err != nil {
host.Log("warn", "inbound supervisor exited", map[string]any{"err": err.Error()})
}
}()
host.Log("info", "fabric inbound supervisor started",
map[string]any{"agents": sup.AgentIDs, "bindings": len(p.bindings)})
}
return nil
}
// hostLogHandler is a tiny slog.Handler that forwards records to the
// plugin's HostAPI.Log. inbound + supervisor use slog for structured
// logging; this bridges to the host's log notification stream.
type hostLogHandler struct {
host plugin.HostAPI
level slog.Level
}
func (h *hostLogHandler) Enabled(_ context.Context, l slog.Level) bool { return l >= h.level }
func (h *hostLogHandler) Handle(_ context.Context, r slog.Record) error {
attrs := make(map[string]any, r.NumAttrs())
r.Attrs(func(a slog.Attr) bool {
attrs[a.Key] = a.Value.Any()
return true
})
h.host.Log(levelString(r.Level), r.Message, attrs)
return nil
}
func (h *hostLogHandler) WithAttrs(_ []slog.Attr) slog.Handler { return h }
func (h *hostLogHandler) WithGroup(_ string) slog.Handler { return h }
func levelString(l slog.Level) string {
switch {
case l >= slog.LevelError:
return "error"
case l >= slog.LevelWarn:
return "warn"
case l >= slog.LevelInfo:
return "info"
default:
return "debug"
}
}
func (p *fabricPlugin) warmSessions(ctx context.Context) error {
// Which agents appear as a binding's AgentID?
agentsNeeded := map[string]bool{}
for _, b := range p.bindings {
agentsNeeded[b.AgentID] = true
}
enabled := p.identities.EnabledEntries()
var firstErr error
for agentID := range agentsNeeded {
if _, ok := enabled[agentID]; !ok {
err := fmt.Errorf("agent %s has channels but no identity (run plexum-fabric-register --agent-id %s --api-key ...)",
agentID, agentID)
p.host.Log("warn", err.Error(), nil)
if firstErr == nil {
firstErr = err
}
continue
}
sess, err := p.tokens.Get(ctx, agentID)
if err != nil {
p.host.Log("warn", "fabric agent warm failed",
map[string]any{"agent": agentID, "err": err.Error()})
if firstErr == nil {
firstErr = err
}
continue
}
p.host.Log("info", "fabric session warm", map[string]any{
"agent": agentID, "fabric_user": sess.User.Email,
"guilds": len(sess.Guilds),
})
}
return firstErr
}
// CallTool handles the "send" outbound tool.
func (p *fabricPlugin) CallTool(ctx context.Context, name string, input json.RawMessage) (plugin.ToolResult, error) {
if name != "send" {
return plugin.ToolResult{}, fmt.Errorf("unknown tool: %s", name)
}
var args struct {
ChannelName string `json:"channel_name"`
SessionID string `json:"session_id"`
Message string `json:"message"`
}
if err := json.Unmarshal(input, &args); err != nil {
return plugin.ToolResult{}, fmt.Errorf("parse args: %w", err)
}
if args.ChannelName == "" {
return errResult("channel_name required"), nil
}
p.host.Log("info", "fabric send", map[string]any{
"channel_name": args.ChannelName, "len": len(args.Message),
})
// Find the binding for this plexum channel name.
var binding *config.FabricBinding
for i := range p.bindings {
if p.bindings[i].PlexumChannelName == args.ChannelName {
binding = &p.bindings[i]
break
}
}
if binding == nil {
return errResult("unknown plexum channel: " + args.ChannelName), nil
}
// Resolve the bound agent's session (may need refresh — F-2 will
// add a proper TTL + background refresh; for F-1 we re-login lazily
// if the cache is empty).
sess, err := p.sessionFor(ctx, binding.AgentID)
if err != nil {
return errResult("session for agent " + binding.AgentID + ": " + err.Error()), nil
}
// Pick the guild endpoint + token for the target guild_node_id.
var (
endpoint string
token string
)
for _, g := range sess.Guilds {
if g.NodeID == binding.FabricGuildNodeID {
endpoint = g.Endpoint
break
}
}
for _, t := range sess.GuildAccessTokens {
if t.GuildNodeID == binding.FabricGuildNodeID {
token = t.Token
break
}
}
if endpoint == "" || token == "" {
return errResult(fmt.Sprintf("agent %s has no access to guild %s",
binding.AgentID, binding.FabricGuildNodeID)), nil
}
if err := p.client.PostMessage(ctx, endpoint, token,
binding.FabricChannelID, args.Message, sess.User.ID); err != nil {
return errResult("post: " + err.Error()), nil
}
return plugin.NewTextResult("sent"), nil
}
func (p *fabricPlugin) sessionFor(ctx context.Context, agentID string) (*fabric.Session, error) {
loginCtx, cancel := context.WithTimeout(ctx, 15*time.Second)
defer cancel()
return p.tokens.Get(loginCtx, agentID)
}
func errResult(msg string) plugin.ToolResult {
return plugin.ToolResult{
Content: []plugin.ContentBlock{{Type: "text", Text: msg}},
IsError: true,
}
}
func main() {
if err := plugin.Serve(&fabricPlugin{}); err != nil {
fmt.Fprintf(os.Stderr, "plexum-fabric-channel-plugin: %v\n", err)
os.Exit(1)
}
}