fix(socketio): remove broken client-initiated ping (engine.io v4)
The socket.io client was sending its own EIO ping frames every
pingInterval (default 25s). That's wrong for engine.io v4: in v4 the
SERVER initiates pings and the CLIENT must respond with pong inside
pingTimeout, else the server closes the connection. Client-initiated
pings get misinterpreted by Fabric's NestJS socket.io backend, which
quietly closes the connection — producing the warn-flap every ~25s:
inbound: socket ended; reconnecting
err="read: failed to get reader: received close frame:
status = StatusNoStatusRcvd and reason = \"\""
Fix:
- delete pingLoop() entirely
- delete the pingPeriod/pingTimeout struct fields + their assignments
in recvOpen (server enforces both anyway; client doesn't need them)
- keep the eioPing case in handlePacket (already correct — responds
with pong)
- drop the now-unused "time" import
End-to-end verified on live Fabric:
- Restarted Plexum at 20:17:35; watched for 90+ seconds
- ZERO "socket ended" events (vs. ~3-4 per 90s before the fix)
- Channel inbound still delivers: alice posted seq=20 → gem agent
(gemini CLI) replied seq=21 "pong"
The plugin no longer flaps. Reconnect backoff machinery (1s→60s)
stays in place as a safety net for genuine network drops.
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -24,7 +24,6 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"net/url"
|
"net/url"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
|
||||||
|
|
||||||
"nhooyr.io/websocket"
|
"nhooyr.io/websocket"
|
||||||
)
|
)
|
||||||
@@ -65,11 +64,9 @@ type Client struct {
|
|||||||
|
|
||||||
// Read-only after Connect; mutating during a live connection is
|
// Read-only after Connect; mutating during a live connection is
|
||||||
// undefined.
|
// undefined.
|
||||||
handlers map[string]Handler
|
handlers map[string]Handler
|
||||||
handlerMu sync.RWMutex
|
handlerMu sync.RWMutex
|
||||||
conn *websocket.Conn
|
conn *websocket.Conn
|
||||||
pingPeriod time.Duration // from server "open" packet
|
|
||||||
pingTimeout time.Duration
|
|
||||||
|
|
||||||
// closed-on-disconnect; Connect returns when this fires.
|
// closed-on-disconnect; Connect returns when this fires.
|
||||||
disconnected chan struct{}
|
disconnected chan struct{}
|
||||||
@@ -168,10 +165,12 @@ func (c *Client) Connect(ctx context.Context) error {
|
|||||||
return fmt.Errorf("CONNECT: %w", err)
|
return fmt.Errorf("CONNECT: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
pingCtx, cancelPing := context.WithCancel(ctx)
|
// Engine.IO v4 keepalive contract: the SERVER pings every
|
||||||
defer cancelPing()
|
// pingInterval; we MUST pong within pingTimeout or it disconnects.
|
||||||
go c.pingLoop(pingCtx)
|
// We do NOT initiate pings ourselves (an earlier version did, which
|
||||||
|
// caused Fabric guild backend to close the socket every ~25s — the
|
||||||
|
// client-side ping was misinterpreted as a misbehaving peer). Server
|
||||||
|
// pings are answered by handlePacket's eioPing case.
|
||||||
for {
|
for {
|
||||||
_, data, err := conn.Read(ctx)
|
_, data, err := conn.Read(ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -197,7 +196,10 @@ func (c *Client) Disconnect() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// recvOpen reads the EIO "open" frame and stashes ping intervals.
|
// recvOpen reads the EIO "open" frame. The server announces its
|
||||||
|
// pingInterval + pingTimeout there; we just acknowledge — the
|
||||||
|
// keepalive cadence is server-driven (we pong on demand, see
|
||||||
|
// handlePacket's eioPing case).
|
||||||
func (c *Client) recvOpen(ctx context.Context) error {
|
func (c *Client) recvOpen(ctx context.Context) error {
|
||||||
_, data, err := c.conn.Read(ctx)
|
_, data, err := c.conn.Read(ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -206,19 +208,14 @@ func (c *Client) recvOpen(ctx context.Context) error {
|
|||||||
if len(data) < 2 || data[0] != eioOpen {
|
if len(data) < 2 || data[0] != eioOpen {
|
||||||
return fmt.Errorf("expected EIO open, got %q", string(data))
|
return fmt.Errorf("expected EIO open, got %q", string(data))
|
||||||
}
|
}
|
||||||
|
// Parse just to fail fast on malformed open; the values are
|
||||||
|
// informational (server enforces them).
|
||||||
var info struct {
|
var info struct {
|
||||||
Sid string `json:"sid"`
|
Sid string `json:"sid"`
|
||||||
PingInterval int `json:"pingInterval"`
|
|
||||||
PingTimeout int `json:"pingTimeout"`
|
|
||||||
}
|
}
|
||||||
if err := json.Unmarshal(data[1:], &info); err != nil {
|
if err := json.Unmarshal(data[1:], &info); err != nil {
|
||||||
return fmt.Errorf("parse open: %w", err)
|
return fmt.Errorf("parse open: %w", err)
|
||||||
}
|
}
|
||||||
c.pingPeriod = time.Duration(info.PingInterval) * time.Millisecond
|
|
||||||
c.pingTimeout = time.Duration(info.PingTimeout) * time.Millisecond
|
|
||||||
if c.pingPeriod <= 0 {
|
|
||||||
c.pingPeriod = 25 * time.Second // EIO default
|
|
||||||
}
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -238,35 +235,16 @@ func (c *Client) sendConnect(ctx context.Context, auth map[string]any) error {
|
|||||||
return c.conn.Write(ctx, websocket.MessageText, body)
|
return c.conn.Write(ctx, websocket.MessageText, body)
|
||||||
}
|
}
|
||||||
|
|
||||||
// pingLoop sends EIO ping frames per server-supplied interval.
|
|
||||||
func (c *Client) pingLoop(ctx context.Context) {
|
|
||||||
ticker := time.NewTicker(c.pingPeriod)
|
|
||||||
defer ticker.Stop()
|
|
||||||
for {
|
|
||||||
select {
|
|
||||||
case <-ctx.Done():
|
|
||||||
return
|
|
||||||
case <-ticker.C:
|
|
||||||
if c.conn == nil {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
writeCtx, cancel := context.WithTimeout(ctx, c.pingTimeout)
|
|
||||||
err := c.conn.Write(writeCtx, websocket.MessageText, []byte{eioPing})
|
|
||||||
cancel()
|
|
||||||
if err != nil {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// handlePacket inspects the first byte (EIO type) + dispatches.
|
// handlePacket inspects the first byte (EIO type) + dispatches.
|
||||||
func (c *Client) handlePacket(data []byte) error {
|
func (c *Client) handlePacket(data []byte) error {
|
||||||
switch data[0] {
|
switch data[0] {
|
||||||
case eioPong:
|
case eioPong:
|
||||||
return nil // server responding to our ping (or vice versa)
|
// Server doesn't normally send unsolicited pongs in v4; ignore
|
||||||
|
// if it does. (Future: could be used to confirm an upgrade ack.)
|
||||||
|
return nil
|
||||||
case eioPing:
|
case eioPing:
|
||||||
// Server-initiated ping; reply with pong.
|
// Server keepalive ping — MUST respond with pong inside
|
||||||
|
// server's pingTimeout, else server closes the connection.
|
||||||
if c.conn == nil {
|
if c.conn == nil {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user