diff --git a/monitoring/src/main/java/org/thingsboard/monitoring/notification/incident/IncidentManager.java b/monitoring/src/main/java/org/thingsboard/monitoring/notification/incident/IncidentManager.java index 1e52c37634..c25abfc081 100644 --- a/monitoring/src/main/java/org/thingsboard/monitoring/notification/incident/IncidentManager.java +++ b/monitoring/src/main/java/org/thingsboard/monitoring/notification/incident/IncidentManager.java @@ -94,7 +94,14 @@ public class IncidentManager { } finally { if (activeIncidentThreadId != null) { lastAlertTime = Instant.now(); - resetResolutionTimer(); + // High latency is a warning only — it has no explicit recovery signal + // (HighLatencyNotification fires only when something is above threshold), + // so resolution hinges on failing services alone. + if (failingServices.isEmpty()) { + resetResolutionTimer(); + } else { + cancelResolutionTimer(); + } } } } @@ -172,10 +179,15 @@ public class IncidentManager { } private void resetResolutionTimer() { + cancelResolutionTimer(); + resolutionTask = scheduler.schedule(this::resolveIncident, resolutionTimeoutSeconds, TimeUnit.SECONDS); + } + + private void cancelResolutionTimer() { if (resolutionTask != null) { resolutionTask.cancel(false); + resolutionTask = null; } - resolutionTask = scheduler.schedule(this::resolveIncident, resolutionTimeoutSeconds, TimeUnit.SECONDS); } private void startDurationUpdater() { diff --git a/monitoring/src/test/java/org/thingsboard/monitoring/notification/incident/IncidentManagerTest.java b/monitoring/src/test/java/org/thingsboard/monitoring/notification/incident/IncidentManagerTest.java index afcd20adf3..b7051db534 100644 --- a/monitoring/src/test/java/org/thingsboard/monitoring/notification/incident/IncidentManagerTest.java +++ b/monitoring/src/test/java/org/thingsboard/monitoring/notification/incident/IncidentManagerTest.java @@ -135,6 +135,27 @@ class IncidentManagerTest { assertThat(transport.updates).isEmpty(); } + @Test + void doesNotAutoResolveWhileServicesAreStillFailing() throws Exception { + manager.shutdown(); + transport = new RecordingTransport(); + manager = new IncidentManager(transport, 1L, "tbqa", false); + + manager.sendAlert("CoAP failure", List.of(AffectedService.failing("CoAP", 1))); + Thread.sleep(1500); + + assertThat(transport.updates) + .extracting(RecordingTransport.Message::text) + .noneMatch(t -> t.contains(":white_check_mark:")); + + manager.sendAlert("CoAP is OK", List.of(AffectedService.recovered("CoAP"))); + Thread.sleep(1500); + + assertThat(transport.updates) + .extracting(RecordingTransport.Message::text) + .anyMatch(t -> t.contains(":white_check_mark:")); + } + private static class RecordingTransport implements IncidentTransport { private final AtomicInteger threadCounter = new AtomicInteger(); final java.util.List incidents = new java.util.ArrayList<>();