diff --git a/apps/insights/src/worker.ts b/apps/insights/src/worker.ts index 86b74043d..bec9dc057 100644 --- a/apps/insights/src/worker.ts +++ b/apps/insights/src/worker.ts @@ -9,6 +9,23 @@ import { Worker } from "bullmq"; import { processInsightsJob } from "./jobs"; import { emitInsightsEvent } from "./lib/evlog-insights"; +// These Redis errors are transient — they occur during failover or server +// upgrade and the BullMQ worker reconnects automatically (maxRetriesPerRequest: +// null). Logging them at WARN prevents false-positive ERROR incidents. +const TRANSIENT_REDIS_ERROR_PATTERNS = [ + /^READONLY /, + /^ERR caller gone/, + /ECONNRESET/, + /Connection is closed/, + /Socket closed unexpectedly/, +]; + +function isTransientRedisError(error: Error): boolean { + return TRANSIENT_REDIS_ERROR_PATTERNS.some((pattern) => + pattern.test(error.message) + ); +} + const DEFAULT_INSIGHTS_WORKER_CONCURRENCY = 5; export function getInsightsWorkerConcurrency( @@ -77,7 +94,8 @@ export function startInsightsWorker() { }); worker.on("error", (error) => { - emitInsightsEvent("error", "worker.error", { + const level = isTransientRedisError(error) ? "warn" : "error"; + emitInsightsEvent(level, "worker.error", { error_message: error.message, error_stack: error.stack, });