fix: Increase timeout for SSL provisioning and delete old data jobs to accommodate longer processing times

2026-04-06 00:32:12 +02:00 · 2025-08-27 10:16:35 +01:00
parent 4e0dfb3664
commit 47cf7ba763
2 changed files with 9 additions and 2 deletions
--- a/Worker/Jobs/StatusPageCerts/StatusPageCerts.ts
+++ b/Worker/Jobs/StatusPageCerts/StatusPageCerts.ts
@@ -25,6 +25,8 @@ RunCron(
  {
    schedule: IsDevelopment ? EVERY_FIFTEEN_MINUTE : EVERY_FIFTEEN_MINUTE,
    runOnStartup: false,
+    // Checking provisioning status may require multiple external API calls (DNS + CA) and can exceed default 5m.
+    timeoutInMS: OneUptimeDate.convertMinutesToMilliseconds(30),
  },
  async () => {
    await StatusPageDomainService.updateSslProvisioningStatusForAllDomains();
@@ -36,7 +38,8 @@ RunCron(
  {
    schedule: IsDevelopment ? EVERY_FIFTEEN_MINUTE : EVERY_FIFTEEN_MINUTE,
    runOnStartup: false,
-    timeoutInMS: OneUptimeDate.convertMinutesToMilliseconds(5),
+    // Ordering SSL can involve domain validation challenges and upstream rate limits; allow more time.
+    timeoutInMS: OneUptimeDate.convertMinutesToMilliseconds(30),
  },
  async () => {
    return await Telemetry.startActiveSpan<Promise<void>>({
@@ -45,7 +48,7 @@ RunCron(
        attributes: {
          schedule: IsDevelopment ? EVERY_FIFTEEN_MINUTE : EVERY_FIFTEEN_MINUTE,
          runOnStartup: false,
-          timeoutInMS: OneUptimeDate.convertMinutesToMilliseconds(5),
+          timeoutInMS: OneUptimeDate.convertMinutesToMilliseconds(15),
        },
      },
      fn: async (span: Span): Promise<void> => {
--- a/Worker/Jobs/TelemetryService/DeleteOldData.ts
+++ b/Worker/Jobs/TelemetryService/DeleteOldData.ts
@@ -14,6 +14,10 @@ RunCron(
  {
    schedule: IsDevelopment ? EVERY_MINUTE : EVERY_HOUR,
    runOnStartup: false,
+    // This job iterates over all telemetry services and issues ClickHouse DELETE mutations
+    // which can take longer than the default 5 minute job timeout when there is a lot of data.
+    // Increase timeout to 25 minutes (just under the hourly schedule) to prevent premature timeouts.
+    timeoutInMS: OneUptimeDate.convertMinutesToMilliseconds(30),
  },
  async () => {
    // get a list of all the telemetry services.