From 0332745eef041f098953bed4caba16da54db469f Mon Sep 17 00:00:00 2001 From: Andy Zhang <87735571+Andyz26@users.noreply.github.com> Date: Tue, 9 Apr 2024 14:56:56 -0700 Subject: [PATCH] fix rcActor supervisor (#652) --- .../resourcecluster/ResourceClusterActor.java | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/mantis-control-plane/mantis-control-plane-server/src/main/java/io/mantisrx/master/resourcecluster/ResourceClusterActor.java b/mantis-control-plane/mantis-control-plane-server/src/main/java/io/mantisrx/master/resourcecluster/ResourceClusterActor.java index 266aa2a0c..7a9a699f9 100644 --- a/mantis-control-plane/mantis-control-plane-server/src/main/java/io/mantisrx/master/resourcecluster/ResourceClusterActor.java +++ b/mantis-control-plane/mantis-control-plane-server/src/main/java/io/mantisrx/master/resourcecluster/ResourceClusterActor.java @@ -20,8 +20,11 @@ import akka.actor.AbstractActorWithTimers; import akka.actor.ActorRef; +import akka.actor.OneForOneStrategy; import akka.actor.Props; import akka.actor.Status; +import akka.actor.SupervisorStrategy; +import akka.japi.pf.DeciderBuilder; import akka.japi.pf.ReceiveBuilder; import com.netflix.spectator.api.Tag; import com.netflix.spectator.api.TagList; @@ -95,6 +98,24 @@ @ToString(of = {"clusterID"}) @Slf4j class ResourceClusterActor extends AbstractActorWithTimers { + /** + * For ResourceClusterActor instances, we need to ensure they are always running after encountering error so that + * TaskExecutors can still remain connected. If there is a fatal error that needs to be escalated to terminate the + * whole system/leader you can define a fatal exception type and override its behavior to + * SupervisorStrategy.escalate() instead. + */ + private static SupervisorStrategy resourceClusterActorStrategy = + new OneForOneStrategy( + 3, + Duration.ofSeconds(60), + DeciderBuilder + .match(Exception.class, e -> SupervisorStrategy.restart()) + .build()); + + @Override + public SupervisorStrategy supervisorStrategy() { + return resourceClusterActorStrategy; + } private final Duration heartbeatTimeout; private final Duration assignmentTimeout;