fix: Increase timeout for SSL provisioning and delete old data jobs to accommodate longer processing times

This commit is contained in:
Simon Larsen
2025-08-27 10:16:35 +01:00
parent 4e0dfb3664
commit 47cf7ba763
2 changed files with 9 additions and 2 deletions

View File

@@ -25,6 +25,8 @@ RunCron(
{
schedule: IsDevelopment ? EVERY_FIFTEEN_MINUTE : EVERY_FIFTEEN_MINUTE,
runOnStartup: false,
// Checking provisioning status may require multiple external API calls (DNS + CA) and can exceed default 5m.
timeoutInMS: OneUptimeDate.convertMinutesToMilliseconds(30),
},
async () => {
await StatusPageDomainService.updateSslProvisioningStatusForAllDomains();
@@ -36,7 +38,8 @@ RunCron(
{
schedule: IsDevelopment ? EVERY_FIFTEEN_MINUTE : EVERY_FIFTEEN_MINUTE,
runOnStartup: false,
timeoutInMS: OneUptimeDate.convertMinutesToMilliseconds(5),
// Ordering SSL can involve domain validation challenges and upstream rate limits; allow more time.
timeoutInMS: OneUptimeDate.convertMinutesToMilliseconds(30),
},
async () => {
return await Telemetry.startActiveSpan<Promise<void>>({
@@ -45,7 +48,7 @@ RunCron(
attributes: {
schedule: IsDevelopment ? EVERY_FIFTEEN_MINUTE : EVERY_FIFTEEN_MINUTE,
runOnStartup: false,
timeoutInMS: OneUptimeDate.convertMinutesToMilliseconds(5),
timeoutInMS: OneUptimeDate.convertMinutesToMilliseconds(15),
},
},
fn: async (span: Span): Promise<void> => {

View File

@@ -14,6 +14,10 @@ RunCron(
{
schedule: IsDevelopment ? EVERY_MINUTE : EVERY_HOUR,
runOnStartup: false,
// This job iterates over all telemetry services and issues ClickHouse DELETE mutations
// which can take longer than the default 5 minute job timeout when there is a lot of data.
// Increase timeout to 25 minutes (just under the hourly schedule) to prevent premature timeouts.
timeoutInMS: OneUptimeDate.convertMinutesToMilliseconds(30),
},
async () => {
// get a list of all the telemetry services.