diff --git a/backend/src/Squidex.Infrastructure/EventSourcing/EventConsumersHealthCheck.cs b/backend/src/Squidex.Infrastructure/EventSourcing/EventConsumersHealthCheck.cs new file mode 100644 index 000000000..d7d99730a --- /dev/null +++ b/backend/src/Squidex.Infrastructure/EventSourcing/EventConsumersHealthCheck.cs @@ -0,0 +1,77 @@ +// ========================================================================== +// Squidex Headless CMS +// ========================================================================== +// Copyright (c) Squidex UG (haftungsbeschraenkt) +// All rights reserved. Licensed under the MIT license. +// ========================================================================== + +using System.Collections.Generic; +using System.Threading; +using System.Threading.Tasks; +using Microsoft.Extensions.Diagnostics.HealthChecks; +using Orleans; +using Squidex.Infrastructure.EventSourcing.Grains; +using Squidex.Infrastructure.Orleans; + +namespace Squidex.Infrastructure.EventSourcing +{ + public sealed class EventConsumersHealthCheck : IHealthCheck + { + private readonly IGrainFactory grainFactory; + + public EventConsumersHealthCheck(IGrainFactory grainFactory) + { + Guard.NotNull(grainFactory); + + this.grainFactory = grainFactory; + } + + public async Task CheckHealthAsync(HealthCheckContext context, CancellationToken cancellationToken = default) + { + var eventConsumers = await GetGrain().GetConsumersAsync(); + + var data = new Dictionary(); + + var numTotal = 0; + var numFailed = 0; + + foreach (var eventConsumer in eventConsumers.Value) + { + var status = "Running"; + + if (eventConsumer.Error != null) + { + status = "Failed"; + + numFailed++; + } + else if (eventConsumer.IsStopped) + { + status = "Stopped"; + } + + data[eventConsumer.Name] = status; + + numTotal++; + } + + if (numTotal > 0 && numFailed == numTotal) + { + return HealthCheckResult.Unhealthy("All event consumers failed", null, data); + } + else if (numFailed > 0) + { + return HealthCheckResult.Degraded("One or more event consumers failed", null, data); + } + else + { + return HealthCheckResult.Healthy(data: data); + } + } + + private IEventConsumerManagerGrain GetGrain() + { + return grainFactory.GetGrain(SingleGrain.Id); + } + } +} diff --git a/backend/src/Squidex/Config/Domain/HealthCheckServices.cs b/backend/src/Squidex/Config/Domain/HealthCheckServices.cs index 5e92bc0d5..a337b8568 100644 --- a/backend/src/Squidex/Config/Domain/HealthCheckServices.cs +++ b/backend/src/Squidex/Config/Domain/HealthCheckServices.cs @@ -9,6 +9,7 @@ using Microsoft.Extensions.Configuration; using Microsoft.Extensions.DependencyInjection; using Squidex.Domain.Apps.Entities.Apps.Diagnostics; using Squidex.Infrastructure.Diagnostics; +using Squidex.Infrastructure.EventSourcing; namespace Squidex.Config.Domain { @@ -22,7 +23,8 @@ namespace Squidex.Config.Domain services.AddHealthChecks() .AddCheck("GC", tags: new[] { "node" }) .AddCheck("Orleans", tags: new[] { "cluster" }) - .AddCheck("Orleans App", tags: new[] { "cluster" }); + .AddCheck("OrleansApp", tags: new[] { "cluster" }) + .AddCheck("EventConsumers", tags: new[] { "background" }); } } } diff --git a/backend/src/Squidex/Config/Web/WebExtensions.cs b/backend/src/Squidex/Config/Web/WebExtensions.cs index 00ae1e0a5..8e860d1f2 100644 --- a/backend/src/Squidex/Config/Web/WebExtensions.cs +++ b/backend/src/Squidex/Config/Web/WebExtensions.cs @@ -75,7 +75,7 @@ namespace Squidex.Config.Web app.UseHealthChecks("/readiness", new HealthCheckOptions { - Predicate = check => true, + Predicate = check => !check.Tags.Contains("background"), ResponseWriter = writer }); @@ -91,6 +91,12 @@ namespace Squidex.Config.Web ResponseWriter = writer }); + app.UseHealthChecks("/background-healthz", new HealthCheckOptions + { + Predicate = check => check.Tags.Contains("background"), + ResponseWriter = writer + }); + return app; } diff --git a/backend/tests/Squidex.Infrastructure.Tests/EventSourcing/EventConsumersHealthCheckTests.cs b/backend/tests/Squidex.Infrastructure.Tests/EventSourcing/EventConsumersHealthCheckTests.cs new file mode 100644 index 000000000..62b36b80f --- /dev/null +++ b/backend/tests/Squidex.Infrastructure.Tests/EventSourcing/EventConsumersHealthCheckTests.cs @@ -0,0 +1,121 @@ +// ========================================================================== +// Squidex Headless CMS +// ========================================================================== +// Copyright (c) Squidex UG (haftungsbeschraenkt) +// All rights reserved. Licensed under the MIT license. +// ========================================================================== + +using System.Collections.Generic; +using System.Threading.Tasks; +using FakeItEasy; +using Microsoft.Extensions.Diagnostics.HealthChecks; +using Orleans; +using Orleans.Concurrency; +using Squidex.Infrastructure.EventSourcing.Grains; +using Squidex.Infrastructure.Orleans; +using Xunit; + +namespace Squidex.Infrastructure.EventSourcing +{ + public class EventConsumersHealthCheckTests + { + private readonly IGrainFactory grainFactory = A.Fake(); + private readonly IEventConsumerManagerGrain eventConsumerManager = A.Fake(); + private readonly List consumers = new List(); + private readonly EventConsumersHealthCheck sut; + + public EventConsumersHealthCheckTests() + { + A.CallTo(() => grainFactory.GetGrain(SingleGrain.Id, null)) + .Returns(eventConsumerManager); + + A.CallTo(() => eventConsumerManager.GetConsumersAsync()) + .Returns(consumers.AsImmutable()); + + sut = new EventConsumersHealthCheck(grainFactory); + } + + [Fact] + public async Task Should_return_healthy_if_no_consumer_found() + { + var status = await sut.CheckHealthAsync(null!); + + Assert.Equal(HealthStatus.Healthy, status.Status); + } + + [Fact] + public async Task Should_return_healthy_if_no_consumer_failed() + { + consumers.Add(new EventConsumerInfo + { + Name = "Consumer1", + }); + + consumers.Add(new EventConsumerInfo + { + Name = "Consumer2" + }); + + consumers.Add(new EventConsumerInfo + { + Name = "Consumer2" + }); + + var status = await sut.CheckHealthAsync(null!); + + Assert.Equal(HealthStatus.Healthy, status.Status); + } + + [Fact] + public async Task Should_return_unhealthy_if_all_consumers_failed() + { + consumers.Add(new EventConsumerInfo + { + Name = "Consumer1", + Error = "Failed1" + }); + + consumers.Add(new EventConsumerInfo + { + Name = "Consumer2", + Error = "Failed2" + }); + + consumers.Add(new EventConsumerInfo + { + Name = "Consumer3", + Error = "Failed3" + }); + + var status = await sut.CheckHealthAsync(null!); + + Assert.Equal(HealthStatus.Unhealthy, status.Status); + } + + [Fact] + public async Task Should_return_degrated_if_at_least_one_consumers_failed() + { + consumers.Add(new EventConsumerInfo + { + Name = "Consumer1", + Error = "Failed1" + }); + + consumers.Add(new EventConsumerInfo + { + Name = "Consumer2", + IsStopped = true + }); + + consumers.Add(new EventConsumerInfo + { + Name = "Consumer3", + IsStopped = false + }); + + var status = await sut.CheckHealthAsync(null!); + + Assert.Equal(HealthStatus.Degraded, status.Status); + } + } +}