fix: moderator presence reconnect loop

Root causes:
1. Multiple plugin subsystems each called startModeratorPresence,
   creating competing WebSocket connections to the same bot token.
   Discord only allows one connection per bot → 4008 rate limit →
   infinite reconnect loop (1000+ connects → token reset by Discord)

2. Invalid session (op 9) handler called scheduleReconnect, but the
   new connection would also get kicked → cascading reconnects

Fixes:
- Singleton guard: startModeratorPresence is a no-op if already started
- cleanup() nullifies old ws handlers before creating new connection
- Stale ws check: all callbacks verify they belong to current ws
- Exponential backoff with cap (max 60s) instead of fixed 2-5s delay
- heartbeat ACK tracking: detect zombie connections
- Non-recoverable codes (4004) properly stop all reconnection
This commit is contained in:
zhi
2026-02-28 18:49:17 +00:00
parent 385990ab90
commit 86fdc63802

View File

@@ -1,15 +1,20 @@
/** /**
* Minimal Discord Gateway connection to keep the moderator bot "online". * Minimal Discord Gateway connection to keep the moderator bot "online".
* Uses Node.js built-in WebSocket (Node 22+). * Uses Node.js built-in WebSocket (Node 22+).
*
* IMPORTANT: Only ONE instance should exist per bot token.
* Uses a singleton guard to prevent multiple connections.
*/ */
let ws: WebSocket | null = null; let ws: WebSocket | null = null;
let heartbeatInterval: ReturnType<typeof setInterval> | null = null; let heartbeatInterval: ReturnType<typeof setInterval> | null = null;
let heartbeatAcked = true;
let lastSequence: number | null = null; let lastSequence: number | null = null;
let sessionId: string | null = null; let sessionId: string | null = null;
let resumeUrl: string | null = null; let resumeUrl: string | null = null;
let reconnectTimer: ReturnType<typeof setTimeout> | null = null; let reconnectTimer: ReturnType<typeof setTimeout> | null = null;
let destroyed = false; let destroyed = false;
let started = false; // singleton guard
type Logger = { type Logger = {
info: (msg: string) => void; info: (msg: string) => void;
@@ -17,6 +22,8 @@ type Logger = {
}; };
const GATEWAY_URL = "wss://gateway.discord.gg/?v=10&encoding=json"; const GATEWAY_URL = "wss://gateway.discord.gg/?v=10&encoding=json";
const MAX_RECONNECT_DELAY_MS = 60_000;
let reconnectAttempts = 0;
function sendPayload(data: Record<string, unknown>) { function sendPayload(data: Record<string, unknown>) {
if (ws?.readyState === WebSocket.OPEN) { if (ws?.readyState === WebSocket.OPEN) {
@@ -26,29 +33,79 @@ function sendPayload(data: Record<string, unknown>) {
function startHeartbeat(intervalMs: number) { function startHeartbeat(intervalMs: number) {
stopHeartbeat(); stopHeartbeat();
heartbeatAcked = true;
// First heartbeat after jitter
const jitter = Math.floor(Math.random() * intervalMs); const jitter = Math.floor(Math.random() * intervalMs);
setTimeout(() => { const firstTimer = setTimeout(() => {
if (destroyed) return;
if (!heartbeatAcked) {
// Missed ACK — zombie connection, close and reconnect
ws?.close(4000, "missed heartbeat ack");
return;
}
heartbeatAcked = false;
sendPayload({ op: 1, d: lastSequence }); sendPayload({ op: 1, d: lastSequence });
heartbeatInterval = setInterval(() => { heartbeatInterval = setInterval(() => {
if (destroyed) return;
if (!heartbeatAcked) {
ws?.close(4000, "missed heartbeat ack");
return;
}
heartbeatAcked = false;
sendPayload({ op: 1, d: lastSequence }); sendPayload({ op: 1, d: lastSequence });
}, intervalMs); }, intervalMs);
}, jitter); }, jitter);
// Store the first timer so we can clear it
heartbeatInterval = firstTimer as unknown as ReturnType<typeof setInterval>;
} }
function stopHeartbeat() { function stopHeartbeat() {
if (heartbeatInterval) { if (heartbeatInterval) {
clearInterval(heartbeatInterval); clearInterval(heartbeatInterval);
clearTimeout(heartbeatInterval as unknown as ReturnType<typeof setTimeout>);
heartbeatInterval = null; heartbeatInterval = null;
} }
} }
function cleanup() {
stopHeartbeat();
if (ws) {
// Remove all handlers to avoid ghost callbacks
ws.onopen = null;
ws.onmessage = null;
ws.onclose = null;
ws.onerror = null;
try { ws.close(1000); } catch { /* ignore */ }
ws = null;
}
}
function connect(token: string, logger: Logger, isResume = false) { function connect(token: string, logger: Logger, isResume = false) {
if (destroyed) return; if (destroyed) return;
// Clean up any existing connection first
cleanup();
const url = isResume && resumeUrl ? resumeUrl : GATEWAY_URL; const url = isResume && resumeUrl ? resumeUrl : GATEWAY_URL;
try {
ws = new WebSocket(url); ws = new WebSocket(url);
} catch (err) {
logger.warn(`whispergate: moderator ws constructor failed: ${String(err)}`);
scheduleReconnect(token, logger, false);
return;
}
const currentWs = ws; // capture for closure
ws.onopen = () => { ws.onopen = () => {
if (currentWs !== ws || destroyed) return; // stale
reconnectAttempts = 0; // reset on successful open
if (isResume && sessionId) { if (isResume && sessionId) {
sendPayload({ sendPayload({
op: 6, op: 6,
@@ -75,17 +132,20 @@ function connect(token: string, logger: Logger, isResume = false) {
}; };
ws.onmessage = (evt: MessageEvent) => { ws.onmessage = (evt: MessageEvent) => {
if (currentWs !== ws || destroyed) return;
try { try {
const msg = JSON.parse(typeof evt.data === "string" ? evt.data : String(evt.data)); const msg = JSON.parse(typeof evt.data === "string" ? evt.data : String(evt.data));
const { op, t, s, d } = msg; const { op, t, s, d } = msg;
if (s) lastSequence = s; if (s != null) lastSequence = s;
switch (op) { switch (op) {
case 10: // Hello case 10: // Hello
startHeartbeat(d.heartbeat_interval); startHeartbeat(d.heartbeat_interval);
break; break;
case 11: // Heartbeat ACK case 11: // Heartbeat ACK
heartbeatAcked = true;
break; break;
case 1: // Heartbeat request case 1: // Heartbeat request
sendPayload({ op: 1, d: lastSequence }); sendPayload({ op: 1, d: lastSequence });
@@ -100,36 +160,49 @@ function connect(token: string, logger: Logger, isResume = false) {
logger.info("whispergate: moderator bot resumed"); logger.info("whispergate: moderator bot resumed");
} }
break; break;
case 7: // Reconnect case 7: // Reconnect request
logger.info("whispergate: moderator bot reconnect requested"); logger.info("whispergate: moderator bot reconnect requested by Discord");
ws?.close(4000); cleanup();
scheduleReconnect(token, logger, true);
break; break;
case 9: // Invalid Session case 9: // Invalid Session
logger.warn(`whispergate: moderator bot invalid session, resumable=${d}`); logger.warn(`whispergate: moderator bot invalid session, resumable=${d}`);
if (d) { cleanup();
scheduleReconnect(token, logger, true); sessionId = d ? sessionId : null;
} else { // Wait longer before re-identifying
sessionId = null; setTimeout(() => {
scheduleReconnect(token, logger, false); if (!destroyed) connect(token, logger, !!d && !!sessionId);
} }, 3000 + Math.random() * 2000);
break; break;
} }
} catch { } catch {
// ignore // ignore parse errors
} }
}; };
ws.onclose = (evt: CloseEvent) => { ws.onclose = (evt: CloseEvent) => {
if (currentWs !== ws) return; // stale ws
stopHeartbeat(); stopHeartbeat();
if (destroyed) return; if (destroyed) return;
const code = evt.code; const code = evt.code;
// Non-recoverable codes — stop reconnecting
if (code === 4004) { if (code === 4004) {
logger.warn("whispergate: moderator bot token invalid, not reconnecting"); logger.warn("whispergate: moderator bot token invalid (4004), stopping");
started = false;
return; return;
} }
logger.info(`whispergate: moderator bot disconnected (code=${code}), reconnecting...`); if (code === 4010 || code === 4011 || code === 4013 || code === 4014) {
const canResume = code !== 4010 && code !== 4011 && code !== 4012 && code !== 4013 && code !== 4014; logger.warn(`whispergate: moderator bot fatal close (${code}), re-identifying`);
scheduleReconnect(token, logger, canResume && !!sessionId); sessionId = null;
scheduleReconnect(token, logger, false);
return;
}
logger.info(`whispergate: moderator bot disconnected (code=${code}), will reconnect`);
const canResume = !!sessionId && code !== 4012;
scheduleReconnect(token, logger, canResume);
}; };
ws.onerror = () => { ws.onerror = () => {
@@ -140,24 +213,45 @@ function connect(token: string, logger: Logger, isResume = false) {
function scheduleReconnect(token: string, logger: Logger, resume: boolean) { function scheduleReconnect(token: string, logger: Logger, resume: boolean) {
if (destroyed) return; if (destroyed) return;
if (reconnectTimer) clearTimeout(reconnectTimer); if (reconnectTimer) clearTimeout(reconnectTimer);
const delay = 2000 + Math.random() * 3000;
reconnectTimer = setTimeout(() => connect(token, logger, resume), delay); // Exponential backoff with cap
reconnectAttempts++;
const baseDelay = Math.min(1000 * Math.pow(2, reconnectAttempts), MAX_RECONNECT_DELAY_MS);
const jitter = Math.random() * 1000;
const delay = baseDelay + jitter;
logger.info(`whispergate: moderator reconnect in ${Math.round(delay)}ms (attempt ${reconnectAttempts})`);
reconnectTimer = setTimeout(() => {
reconnectTimer = null;
connect(token, logger, resume);
}, delay);
} }
/**
* Start the moderator bot's Discord Gateway connection.
* Singleton: calling multiple times with the same token is safe (no-op).
*/
export function startModeratorPresence(token: string, logger: Logger): void { export function startModeratorPresence(token: string, logger: Logger): void {
if (started) {
logger.info("whispergate: moderator presence already started, skipping");
return;
}
started = true;
destroyed = false; destroyed = false;
reconnectAttempts = 0;
connect(token, logger); connect(token, logger);
} }
/**
* Disconnect the moderator bot.
*/
export function stopModeratorPresence(): void { export function stopModeratorPresence(): void {
destroyed = true; destroyed = true;
stopHeartbeat(); started = false;
if (reconnectTimer) { if (reconnectTimer) {
clearTimeout(reconnectTimer); clearTimeout(reconnectTimer);
reconnectTimer = null; reconnectTimer = null;
} }
if (ws) { cleanup();
ws.close(1000);
ws = null;
}
} }