fix: moderator presence reconnect loop

Root causes:
1. Multiple plugin subsystems each called startModeratorPresence,
   creating competing WebSocket connections to the same bot token.
   Discord only allows one connection per bot → 4008 rate limit →
   infinite reconnect loop (1000+ connects → token reset by Discord)

2. Invalid session (op 9) handler called scheduleReconnect, but the
   new connection would also get kicked → cascading reconnects

Fixes:
- Singleton guard: startModeratorPresence is a no-op if already started
- cleanup() nullifies old ws handlers before creating new connection
- Stale ws check: all callbacks verify they belong to current ws
- Exponential backoff with cap (max 60s) instead of fixed 2-5s delay
- heartbeat ACK tracking: detect zombie connections
- Non-recoverable codes (4004) properly stop all reconnection
This commit is contained in:
zhi
2026-02-28 18:49:17 +00:00
parent 385990ab90
commit 86fdc63802

View File

@@ -1,15 +1,20 @@
/**
* Minimal Discord Gateway connection to keep the moderator bot "online".
* Uses Node.js built-in WebSocket (Node 22+).
*
* IMPORTANT: Only ONE instance should exist per bot token.
* Uses a singleton guard to prevent multiple connections.
*/
let ws: WebSocket | null = null;
let heartbeatInterval: ReturnType<typeof setInterval> | null = null;
let heartbeatAcked = true;
let lastSequence: number | null = null;
let sessionId: string | null = null;
let resumeUrl: string | null = null;
let reconnectTimer: ReturnType<typeof setTimeout> | null = null;
let destroyed = false;
let started = false; // singleton guard
type Logger = {
info: (msg: string) => void;
@@ -17,6 +22,8 @@ type Logger = {
};
const GATEWAY_URL = "wss://gateway.discord.gg/?v=10&encoding=json";
const MAX_RECONNECT_DELAY_MS = 60_000;
let reconnectAttempts = 0;
function sendPayload(data: Record<string, unknown>) {
if (ws?.readyState === WebSocket.OPEN) {
@@ -26,29 +33,79 @@ function sendPayload(data: Record<string, unknown>) {
function startHeartbeat(intervalMs: number) {
stopHeartbeat();
heartbeatAcked = true;
// First heartbeat after jitter
const jitter = Math.floor(Math.random() * intervalMs);
setTimeout(() => {
const firstTimer = setTimeout(() => {
if (destroyed) return;
if (!heartbeatAcked) {
// Missed ACK — zombie connection, close and reconnect
ws?.close(4000, "missed heartbeat ack");
return;
}
heartbeatAcked = false;
sendPayload({ op: 1, d: lastSequence });
heartbeatInterval = setInterval(() => {
if (destroyed) return;
if (!heartbeatAcked) {
ws?.close(4000, "missed heartbeat ack");
return;
}
heartbeatAcked = false;
sendPayload({ op: 1, d: lastSequence });
}, intervalMs);
}, jitter);
// Store the first timer so we can clear it
heartbeatInterval = firstTimer as unknown as ReturnType<typeof setInterval>;
}
function stopHeartbeat() {
if (heartbeatInterval) {
clearInterval(heartbeatInterval);
clearTimeout(heartbeatInterval as unknown as ReturnType<typeof setTimeout>);
heartbeatInterval = null;
}
}
function cleanup() {
stopHeartbeat();
if (ws) {
// Remove all handlers to avoid ghost callbacks
ws.onopen = null;
ws.onmessage = null;
ws.onclose = null;
ws.onerror = null;
try { ws.close(1000); } catch { /* ignore */ }
ws = null;
}
}
function connect(token: string, logger: Logger, isResume = false) {
if (destroyed) return;
// Clean up any existing connection first
cleanup();
const url = isResume && resumeUrl ? resumeUrl : GATEWAY_URL;
try {
ws = new WebSocket(url);
} catch (err) {
logger.warn(`whispergate: moderator ws constructor failed: ${String(err)}`);
scheduleReconnect(token, logger, false);
return;
}
const currentWs = ws; // capture for closure
ws.onopen = () => {
if (currentWs !== ws || destroyed) return; // stale
reconnectAttempts = 0; // reset on successful open
if (isResume && sessionId) {
sendPayload({
op: 6,
@@ -75,17 +132,20 @@ function connect(token: string, logger: Logger, isResume = false) {
};
ws.onmessage = (evt: MessageEvent) => {
if (currentWs !== ws || destroyed) return;
try {
const msg = JSON.parse(typeof evt.data === "string" ? evt.data : String(evt.data));
const { op, t, s, d } = msg;
if (s) lastSequence = s;
if (s != null) lastSequence = s;
switch (op) {
case 10: // Hello
startHeartbeat(d.heartbeat_interval);
break;
case 11: // Heartbeat ACK
heartbeatAcked = true;
break;
case 1: // Heartbeat request
sendPayload({ op: 1, d: lastSequence });
@@ -100,36 +160,49 @@ function connect(token: string, logger: Logger, isResume = false) {
logger.info("whispergate: moderator bot resumed");
}
break;
case 7: // Reconnect
logger.info("whispergate: moderator bot reconnect requested");
ws?.close(4000);
case 7: // Reconnect request
logger.info("whispergate: moderator bot reconnect requested by Discord");
cleanup();
scheduleReconnect(token, logger, true);
break;
case 9: // Invalid Session
logger.warn(`whispergate: moderator bot invalid session, resumable=${d}`);
if (d) {
scheduleReconnect(token, logger, true);
} else {
sessionId = null;
scheduleReconnect(token, logger, false);
}
cleanup();
sessionId = d ? sessionId : null;
// Wait longer before re-identifying
setTimeout(() => {
if (!destroyed) connect(token, logger, !!d && !!sessionId);
}, 3000 + Math.random() * 2000);
break;
}
} catch {
// ignore
// ignore parse errors
}
};
ws.onclose = (evt: CloseEvent) => {
if (currentWs !== ws) return; // stale ws
stopHeartbeat();
if (destroyed) return;
const code = evt.code;
// Non-recoverable codes — stop reconnecting
if (code === 4004) {
logger.warn("whispergate: moderator bot token invalid, not reconnecting");
logger.warn("whispergate: moderator bot token invalid (4004), stopping");
started = false;
return;
}
logger.info(`whispergate: moderator bot disconnected (code=${code}), reconnecting...`);
const canResume = code !== 4010 && code !== 4011 && code !== 4012 && code !== 4013 && code !== 4014;
scheduleReconnect(token, logger, canResume && !!sessionId);
if (code === 4010 || code === 4011 || code === 4013 || code === 4014) {
logger.warn(`whispergate: moderator bot fatal close (${code}), re-identifying`);
sessionId = null;
scheduleReconnect(token, logger, false);
return;
}
logger.info(`whispergate: moderator bot disconnected (code=${code}), will reconnect`);
const canResume = !!sessionId && code !== 4012;
scheduleReconnect(token, logger, canResume);
};
ws.onerror = () => {
@@ -140,24 +213,45 @@ function connect(token: string, logger: Logger, isResume = false) {
function scheduleReconnect(token: string, logger: Logger, resume: boolean) {
if (destroyed) return;
if (reconnectTimer) clearTimeout(reconnectTimer);
const delay = 2000 + Math.random() * 3000;
reconnectTimer = setTimeout(() => connect(token, logger, resume), delay);
// Exponential backoff with cap
reconnectAttempts++;
const baseDelay = Math.min(1000 * Math.pow(2, reconnectAttempts), MAX_RECONNECT_DELAY_MS);
const jitter = Math.random() * 1000;
const delay = baseDelay + jitter;
logger.info(`whispergate: moderator reconnect in ${Math.round(delay)}ms (attempt ${reconnectAttempts})`);
reconnectTimer = setTimeout(() => {
reconnectTimer = null;
connect(token, logger, resume);
}, delay);
}
/**
* Start the moderator bot's Discord Gateway connection.
* Singleton: calling multiple times with the same token is safe (no-op).
*/
export function startModeratorPresence(token: string, logger: Logger): void {
if (started) {
logger.info("whispergate: moderator presence already started, skipping");
return;
}
started = true;
destroyed = false;
reconnectAttempts = 0;
connect(token, logger);
}
/**
* Disconnect the moderator bot.
*/
export function stopModeratorPresence(): void {
destroyed = true;
stopHeartbeat();
started = false;
if (reconnectTimer) {
clearTimeout(reconnectTimer);
reconnectTimer = null;
}
if (ws) {
ws.close(1000);
ws = null;
}
cleanup();
}