fix: moderator presence reconnect loop
Root causes: 1. Multiple plugin subsystems each called startModeratorPresence, creating competing WebSocket connections to the same bot token. Discord only allows one connection per bot → 4008 rate limit → infinite reconnect loop (1000+ connects → token reset by Discord) 2. Invalid session (op 9) handler called scheduleReconnect, but the new connection would also get kicked → cascading reconnects Fixes: - Singleton guard: startModeratorPresence is a no-op if already started - cleanup() nullifies old ws handlers before creating new connection - Stale ws check: all callbacks verify they belong to current ws - Exponential backoff with cap (max 60s) instead of fixed 2-5s delay - heartbeat ACK tracking: detect zombie connections - Non-recoverable codes (4004) properly stop all reconnection
This commit is contained in:
@@ -1,15 +1,20 @@
|
||||
/**
|
||||
* Minimal Discord Gateway connection to keep the moderator bot "online".
|
||||
* Uses Node.js built-in WebSocket (Node 22+).
|
||||
*
|
||||
* IMPORTANT: Only ONE instance should exist per bot token.
|
||||
* Uses a singleton guard to prevent multiple connections.
|
||||
*/
|
||||
|
||||
let ws: WebSocket | null = null;
|
||||
let heartbeatInterval: ReturnType<typeof setInterval> | null = null;
|
||||
let heartbeatAcked = true;
|
||||
let lastSequence: number | null = null;
|
||||
let sessionId: string | null = null;
|
||||
let resumeUrl: string | null = null;
|
||||
let reconnectTimer: ReturnType<typeof setTimeout> | null = null;
|
||||
let destroyed = false;
|
||||
let started = false; // singleton guard
|
||||
|
||||
type Logger = {
|
||||
info: (msg: string) => void;
|
||||
@@ -17,6 +22,8 @@ type Logger = {
|
||||
};
|
||||
|
||||
const GATEWAY_URL = "wss://gateway.discord.gg/?v=10&encoding=json";
|
||||
const MAX_RECONNECT_DELAY_MS = 60_000;
|
||||
let reconnectAttempts = 0;
|
||||
|
||||
function sendPayload(data: Record<string, unknown>) {
|
||||
if (ws?.readyState === WebSocket.OPEN) {
|
||||
@@ -26,29 +33,79 @@ function sendPayload(data: Record<string, unknown>) {
|
||||
|
||||
function startHeartbeat(intervalMs: number) {
|
||||
stopHeartbeat();
|
||||
heartbeatAcked = true;
|
||||
|
||||
// First heartbeat after jitter
|
||||
const jitter = Math.floor(Math.random() * intervalMs);
|
||||
setTimeout(() => {
|
||||
const firstTimer = setTimeout(() => {
|
||||
if (destroyed) return;
|
||||
if (!heartbeatAcked) {
|
||||
// Missed ACK — zombie connection, close and reconnect
|
||||
ws?.close(4000, "missed heartbeat ack");
|
||||
return;
|
||||
}
|
||||
heartbeatAcked = false;
|
||||
sendPayload({ op: 1, d: lastSequence });
|
||||
|
||||
heartbeatInterval = setInterval(() => {
|
||||
if (destroyed) return;
|
||||
if (!heartbeatAcked) {
|
||||
ws?.close(4000, "missed heartbeat ack");
|
||||
return;
|
||||
}
|
||||
heartbeatAcked = false;
|
||||
sendPayload({ op: 1, d: lastSequence });
|
||||
}, intervalMs);
|
||||
}, jitter);
|
||||
|
||||
// Store the first timer so we can clear it
|
||||
heartbeatInterval = firstTimer as unknown as ReturnType<typeof setInterval>;
|
||||
}
|
||||
|
||||
function stopHeartbeat() {
|
||||
if (heartbeatInterval) {
|
||||
clearInterval(heartbeatInterval);
|
||||
clearTimeout(heartbeatInterval as unknown as ReturnType<typeof setTimeout>);
|
||||
heartbeatInterval = null;
|
||||
}
|
||||
}
|
||||
|
||||
function cleanup() {
|
||||
stopHeartbeat();
|
||||
if (ws) {
|
||||
// Remove all handlers to avoid ghost callbacks
|
||||
ws.onopen = null;
|
||||
ws.onmessage = null;
|
||||
ws.onclose = null;
|
||||
ws.onerror = null;
|
||||
try { ws.close(1000); } catch { /* ignore */ }
|
||||
ws = null;
|
||||
}
|
||||
}
|
||||
|
||||
function connect(token: string, logger: Logger, isResume = false) {
|
||||
if (destroyed) return;
|
||||
|
||||
// Clean up any existing connection first
|
||||
cleanup();
|
||||
|
||||
const url = isResume && resumeUrl ? resumeUrl : GATEWAY_URL;
|
||||
ws = new WebSocket(url);
|
||||
|
||||
try {
|
||||
ws = new WebSocket(url);
|
||||
} catch (err) {
|
||||
logger.warn(`whispergate: moderator ws constructor failed: ${String(err)}`);
|
||||
scheduleReconnect(token, logger, false);
|
||||
return;
|
||||
}
|
||||
|
||||
const currentWs = ws; // capture for closure
|
||||
|
||||
ws.onopen = () => {
|
||||
if (currentWs !== ws || destroyed) return; // stale
|
||||
|
||||
reconnectAttempts = 0; // reset on successful open
|
||||
|
||||
if (isResume && sessionId) {
|
||||
sendPayload({
|
||||
op: 6,
|
||||
@@ -75,17 +132,20 @@ function connect(token: string, logger: Logger, isResume = false) {
|
||||
};
|
||||
|
||||
ws.onmessage = (evt: MessageEvent) => {
|
||||
if (currentWs !== ws || destroyed) return;
|
||||
|
||||
try {
|
||||
const msg = JSON.parse(typeof evt.data === "string" ? evt.data : String(evt.data));
|
||||
const { op, t, s, d } = msg;
|
||||
|
||||
if (s) lastSequence = s;
|
||||
if (s != null) lastSequence = s;
|
||||
|
||||
switch (op) {
|
||||
case 10: // Hello
|
||||
startHeartbeat(d.heartbeat_interval);
|
||||
break;
|
||||
case 11: // Heartbeat ACK
|
||||
heartbeatAcked = true;
|
||||
break;
|
||||
case 1: // Heartbeat request
|
||||
sendPayload({ op: 1, d: lastSequence });
|
||||
@@ -100,36 +160,49 @@ function connect(token: string, logger: Logger, isResume = false) {
|
||||
logger.info("whispergate: moderator bot resumed");
|
||||
}
|
||||
break;
|
||||
case 7: // Reconnect
|
||||
logger.info("whispergate: moderator bot reconnect requested");
|
||||
ws?.close(4000);
|
||||
case 7: // Reconnect request
|
||||
logger.info("whispergate: moderator bot reconnect requested by Discord");
|
||||
cleanup();
|
||||
scheduleReconnect(token, logger, true);
|
||||
break;
|
||||
case 9: // Invalid Session
|
||||
logger.warn(`whispergate: moderator bot invalid session, resumable=${d}`);
|
||||
if (d) {
|
||||
scheduleReconnect(token, logger, true);
|
||||
} else {
|
||||
sessionId = null;
|
||||
scheduleReconnect(token, logger, false);
|
||||
}
|
||||
cleanup();
|
||||
sessionId = d ? sessionId : null;
|
||||
// Wait longer before re-identifying
|
||||
setTimeout(() => {
|
||||
if (!destroyed) connect(token, logger, !!d && !!sessionId);
|
||||
}, 3000 + Math.random() * 2000);
|
||||
break;
|
||||
}
|
||||
} catch {
|
||||
// ignore
|
||||
// ignore parse errors
|
||||
}
|
||||
};
|
||||
|
||||
ws.onclose = (evt: CloseEvent) => {
|
||||
if (currentWs !== ws) return; // stale ws
|
||||
stopHeartbeat();
|
||||
if (destroyed) return;
|
||||
|
||||
const code = evt.code;
|
||||
|
||||
// Non-recoverable codes — stop reconnecting
|
||||
if (code === 4004) {
|
||||
logger.warn("whispergate: moderator bot token invalid, not reconnecting");
|
||||
logger.warn("whispergate: moderator bot token invalid (4004), stopping");
|
||||
started = false;
|
||||
return;
|
||||
}
|
||||
logger.info(`whispergate: moderator bot disconnected (code=${code}), reconnecting...`);
|
||||
const canResume = code !== 4010 && code !== 4011 && code !== 4012 && code !== 4013 && code !== 4014;
|
||||
scheduleReconnect(token, logger, canResume && !!sessionId);
|
||||
if (code === 4010 || code === 4011 || code === 4013 || code === 4014) {
|
||||
logger.warn(`whispergate: moderator bot fatal close (${code}), re-identifying`);
|
||||
sessionId = null;
|
||||
scheduleReconnect(token, logger, false);
|
||||
return;
|
||||
}
|
||||
|
||||
logger.info(`whispergate: moderator bot disconnected (code=${code}), will reconnect`);
|
||||
const canResume = !!sessionId && code !== 4012;
|
||||
scheduleReconnect(token, logger, canResume);
|
||||
};
|
||||
|
||||
ws.onerror = () => {
|
||||
@@ -140,24 +213,45 @@ function connect(token: string, logger: Logger, isResume = false) {
|
||||
function scheduleReconnect(token: string, logger: Logger, resume: boolean) {
|
||||
if (destroyed) return;
|
||||
if (reconnectTimer) clearTimeout(reconnectTimer);
|
||||
const delay = 2000 + Math.random() * 3000;
|
||||
reconnectTimer = setTimeout(() => connect(token, logger, resume), delay);
|
||||
|
||||
// Exponential backoff with cap
|
||||
reconnectAttempts++;
|
||||
const baseDelay = Math.min(1000 * Math.pow(2, reconnectAttempts), MAX_RECONNECT_DELAY_MS);
|
||||
const jitter = Math.random() * 1000;
|
||||
const delay = baseDelay + jitter;
|
||||
|
||||
logger.info(`whispergate: moderator reconnect in ${Math.round(delay)}ms (attempt ${reconnectAttempts})`);
|
||||
|
||||
reconnectTimer = setTimeout(() => {
|
||||
reconnectTimer = null;
|
||||
connect(token, logger, resume);
|
||||
}, delay);
|
||||
}
|
||||
|
||||
/**
|
||||
* Start the moderator bot's Discord Gateway connection.
|
||||
* Singleton: calling multiple times with the same token is safe (no-op).
|
||||
*/
|
||||
export function startModeratorPresence(token: string, logger: Logger): void {
|
||||
if (started) {
|
||||
logger.info("whispergate: moderator presence already started, skipping");
|
||||
return;
|
||||
}
|
||||
started = true;
|
||||
destroyed = false;
|
||||
reconnectAttempts = 0;
|
||||
connect(token, logger);
|
||||
}
|
||||
|
||||
/**
|
||||
* Disconnect the moderator bot.
|
||||
*/
|
||||
export function stopModeratorPresence(): void {
|
||||
destroyed = true;
|
||||
stopHeartbeat();
|
||||
started = false;
|
||||
if (reconnectTimer) {
|
||||
clearTimeout(reconnectTimer);
|
||||
reconnectTimer = null;
|
||||
}
|
||||
if (ws) {
|
||||
ws.close(1000);
|
||||
ws = null;
|
||||
}
|
||||
cleanup();
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user