diff --git a/docs/recipes/probes.md b/docs/recipes/probes.md index fdaccb15..9f6a9f01 100644 --- a/docs/recipes/probes.md +++ b/docs/recipes/probes.md @@ -66,20 +66,20 @@ tye run The sample has a single service with a `liveness` and a `readiness` probe, as shown in this snippet -``` +```yaml services: - name: simple-webapi project: webapi/webapi.csproj replicas: 2 liveness: http: - path: /healthy + path: /lively readiness: http: path: /ready ``` -The service is configured to respond successfully to both the `liveness` and `readiness` probes, so after executing `tye run`, you should see these log lines +After executing `tye run`, you should see these log lines ``` [18:14:05 INF] Replica simple-webapi_a2d67bd9-4 is moving to an healthy state @@ -90,50 +90,28 @@ The service is configured to respond successfully to both the `liveness` and `re As you can see, both replicas pass the `liveness` probe and get prompted to an `Healthy` state, and shortly after, both replica pass the `readiness` probe and get promoted to a `Ready` state. -The sample application exposes an endpoint that allows you modify the responses that the `/healthy` and `/ready` endpoints, in order to see the probes in action. - -For example, if you send an *HTTP GET* to `http://localhost:8080/set?ready=false&timeout=10` or enter that address in the browser, -It will make `/ready` return *HTTP 500* for 10 seconds. - -Shortly after issuing that requests, you should see this log line in the terminal +If the `readiness` probe fails, a replica will move from a `Ready` state to an `Healthy` state. ``` [18:14:18 INF] Replica simple-webapi_a2d67bd9-4 is moving to an healthy state ``` -meaning that the replica got demoted from `Ready` to `Healthy`, due to failing the `readiness` probe. - -After *about* 10 seconds, you should see this log line in the terminal +If after some time, the `readiness` probe becomes successful again, a replica will move from an `Healthy` state to a `Ready` state. ``` [18:14:26 INF] Replica simple-webapi_a2d67bd9-4 is moving to a ready state ``` -meaning that the replica got demoted from `Healthy` to `Ready` again, due to passing the `readiness` probe. - -(*The reason it's a bit less than 10 seconds, is because Tye doesn't fail the probe immediately. It waits for a certain number of consecutive failures, as described in the schema document.*) - -You can use the same method to make the `liveness` probe fail, and watch as Tye restarts the replica. - -Send an *HTTP GET* request to this endpoint `http://localhost:8080/set?healthy=false` - - -And watch for this log line +If the `liveness` probe fails, a replica will be killed, and the orchestrator will spawn a new replica in its stead ``` [18:25:08 INF] Killing replica simple-webapi_a9c2e2f4-d because it has failed the liveness probe -``` - -Shortly after, you should see this log lines - -``` +... [18:25:08 INF] Launching service simple-webapi_0e7fe12d-7 [18:25:09 INF] Replica simple-webapi_0e7fe12d-7 is moving to an healthy state [18:25:11 INF] Replica simple-webapi_0e7fe12d-7 is moving to a ready state ``` -Showing that Tye launches a new replica instead of the replica that it has killed. - ## Deploying with Liveness and Readiness Probes When you deploy an application with `liveness` and/or `readiness` probes to Kubernetes, these probes get translated to their [equivalent representation in Kubernetes](https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/) @@ -160,6 +138,6 @@ kubectl describe deploy simple-webapi And you will notice that the deployment has `Liveness` and `Readiness` in its description ``` -Liveness: http-get http://:80/healthy delay=0s timeout=1s period=1s #success=1 #failure=3 +Liveness: http-get http://:80/lively delay=0s timeout=1s period=1s #success=1 #failure=3 Readiness: http-get http://:80/ready delay=0s timeout=1s period=1s #success=1 #failure=3 -``` \ No newline at end of file +``` diff --git a/samples/liveness-and-readiness/tye.yaml b/samples/liveness-and-readiness/tye.yaml index aa471576..e3b539a6 100644 --- a/samples/liveness-and-readiness/tye.yaml +++ b/samples/liveness-and-readiness/tye.yaml @@ -12,7 +12,7 @@ services: replicas: 2 liveness: http: - path: /healthy + path: /lively initialDelay: 1 readiness: http: diff --git a/samples/liveness-and-readiness/webapi/HealthChecks.cs b/samples/liveness-and-readiness/webapi/HealthChecks.cs new file mode 100644 index 00000000..afd48807 --- /dev/null +++ b/samples/liveness-and-readiness/webapi/HealthChecks.cs @@ -0,0 +1,33 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Collections.Concurrent; +using System.Threading; +using System.Threading.Tasks; +using Microsoft.Extensions.Diagnostics.HealthChecks; + +namespace webapi +{ + class MyGenericCheck : IHealthCheck + { + private ConcurrentDictionary _statuses; + private string _statusToCheck; + + public MyGenericCheck(ConcurrentDictionary statuses, string statusToCheck) + { + _statuses = statuses; + _statusToCheck = statusToCheck; + } + + public Task CheckHealthAsync(HealthCheckContext context, CancellationToken cancellationToken = default) + { + if (!_statuses.TryGetValue(_statusToCheck, out var status)) + { + return Task.FromResult(new HealthCheckResult(context.Registration.FailureStatus)); + } + + return Task.FromResult(new HealthCheckResult(status ? HealthStatus.Healthy : context.Registration.FailureStatus)); + } + } +} \ No newline at end of file diff --git a/samples/liveness-and-readiness/webapi/Startup.cs b/samples/liveness-and-readiness/webapi/Startup.cs index 442d0652..93ddb14f 100644 --- a/samples/liveness-and-readiness/webapi/Startup.cs +++ b/samples/liveness-and-readiness/webapi/Startup.cs @@ -4,42 +4,46 @@ using System; using System.Collections.Concurrent; -using System.Collections.Generic; using System.Linq; -using System.Text.Json; using System.Threading.Tasks; using Microsoft.AspNetCore.Builder; +using Microsoft.AspNetCore.Diagnostics.HealthChecks; using Microsoft.AspNetCore.Hosting; using Microsoft.AspNetCore.Http; using Microsoft.Extensions.DependencyInjection; +using Microsoft.Extensions.Diagnostics.HealthChecks; using Microsoft.Extensions.Hosting; namespace webapi { - class SetDTO - { - public bool? Healthy { get; set; } - public bool? Ready { get; set; } - public int? Timeout{ get; set; } - } - public class Startup { private string _id; - private bool _healthy; - private bool _ready; + + private ConcurrentDictionary _statusDictionary; public Startup() { _id = Guid.NewGuid().ToString(); - _healthy = true; - _ready = true; + _statusDictionary = new ConcurrentDictionary() + { + ["someLivenessCheck"] = true, + ["someReadinessCheck"] = true + }; } // This method gets called by the runtime. Use this method to add services to the container. // For more information on how to configure your application, visit https://go.microsoft.com/fwlink/?LinkID=398940 public void ConfigureServices(IServiceCollection services) { + services + .AddHealthChecks() + // this registers a "liveness" check. A service that fails a liveness check is considered to be unrecoverable and has to be restarted by the orchestrator (Tye/Kubernetes). + // for example: you may consider failing this check if your service has encountered a fatal exception, or if you've detected a memory leak or a substantially long average response time + .AddCheck("someLivenessCheck", new MyGenericCheck(_statusDictionary, "someLivenessCheck"), failureStatus: HealthStatus.Unhealthy, tags: new[] { "liveness" }) + // this registers a "readiness" check. A service that fails a readiness check is considered to be unable to serve traffic temporarily. The orchestrator doesn't restart a service that fails this check, but stops sending traffic to it until it responds to this check positively again. + // for example: you may consider failing this check if your service is currently unable to connect to some external service such as your database, cache service, etc... + .AddCheck("someReadinessCheck", new MyGenericCheck(_statusDictionary, "someReadinessCheck"), failureStatus: HealthStatus.Unhealthy, tags: new[] { "readiness" }); } // This method gets called by the runtime. Use this method to configure the HTTP request pipeline. @@ -56,37 +60,39 @@ namespace webapi { endpoints.MapGet("/", async context => { - await context.Response.WriteAsync($"Hello World! Process Id: {_id}"); + context.Response.ContentType = "text/html"; + await context.Response.WriteAsync($@" + Hello World! Process Id: {_id}
+ This sample service exposes an HTTP GET endpoint /set that allows you to change the results of the liveness/readiness probes.

+ Examples:

+ GET /set?someReadinessCheck=false&timeout=10 will cause the readiness probe to fail for 10 seconds.
+ GET /set?someLivenessCheck=false will cause the liveness probe to fail, resulting in a restart of that replica. + "); }); - endpoints.MapGet("/healthy", async context => + // this endpoint returns HTTP 200 if all "liveness" checks have passed, otherwise, it returns HTTP 500 + endpoints.MapHealthChecks("/lively", new HealthCheckOptions() { - context.Response.StatusCode = _healthy ? 200 : 500; - await context.Response.WriteAsync($"Status Code: {context.Response.StatusCode}"); + Predicate = reg => reg.Tags.Contains("liveness") }); - endpoints.MapGet("/ready", async context => + // this endpoint returns HTTP 200 if all "readiness" checks have passed, otherwise, it returns HTTP 500 + endpoints.MapHealthChecks("/ready", new HealthCheckOptions() { - context.Response.StatusCode = _ready ? 200 : 500; - await context.Response.WriteAsync($"Status Code: {context.Response.StatusCode}"); + Predicate = reg => reg.Tags.Contains("readiness") }); // Should be technically POST/PUT, but it's just for tests... endpoints.MapGet("/set", async context => { var query = context.Request.Query.ToDictionary(kv => kv.Key).ToDictionary(kv => kv.Key, kv => kv.Value.Value.First()); + var statusesFromQuery = query.Where(kv => kv.Key != "timeout").ToDictionary(kv => kv.Key, kv => kv.Value.Trim().ToLower() == "true"); - var originalHealthy = _healthy; - var originalReady = _ready; - - if (query.ContainsKey("healthy") && bool.TryParse(query["healthy"], out var healthy)) - { - _healthy = healthy; - } + var statusesSnapshot = _statusDictionary.Where(kv => statusesFromQuery.ContainsKey(kv.Key)).ToDictionary(kv => kv.Key, kv => kv.Value); - if (query.ContainsKey("ready") && bool.TryParse(query["ready"], out var ready)) + foreach (var status in statusesFromQuery) { - _ready = ready; + _statusDictionary[status.Key] = status.Value; } if (query.ContainsKey("timeout") && int.TryParse(query["timeout"], out var timeout)) @@ -94,8 +100,10 @@ namespace webapi var _ = Task.Delay(TimeSpan.FromSeconds(timeout)) .ContinueWith(_ => { - _healthy = originalHealthy; - _ready = originalReady; + foreach (var previousStatus in statusesSnapshot) + { + _statusDictionary[previousStatus.Key] = previousStatus.Value; + } }); }