diff --git a/temporal-sdk/src/main/java/io/temporal/internal/worker/ActivityPollTask.java b/temporal-sdk/src/main/java/io/temporal/internal/worker/ActivityPollTask.java index 0327bc6f3..5816b9caa 100644 --- a/temporal-sdk/src/main/java/io/temporal/internal/worker/ActivityPollTask.java +++ b/temporal-sdk/src/main/java/io/temporal/internal/worker/ActivityPollTask.java @@ -35,6 +35,8 @@ import io.temporal.worker.MetricsType; import io.temporal.worker.tuning.*; import java.util.Objects; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutionException; import java.util.function.Supplier; import javax.annotation.Nonnull; import javax.annotation.Nullable; @@ -96,18 +98,19 @@ public ActivityTask poll() { PollActivityTaskQueueResponse response; SlotPermit permit; boolean isSuccessful = false; - + CompletableFuture future = + slotSupplier.reserveSlot( + new SlotReservationData( + pollRequest.getTaskQueue().getName(), + pollRequest.getIdentity(), + pollRequest.getWorkerVersionCapabilities().getBuildId())); try { - permit = - slotSupplier.reserveSlot( - new SlotReservationData( - pollRequest.getTaskQueue().getName(), - pollRequest.getIdentity(), - pollRequest.getWorkerVersionCapabilities().getBuildId())); + permit = future.get(); } catch (InterruptedException e) { + future.cancel(true); Thread.currentThread().interrupt(); return null; - } catch (Exception e) { + } catch (ExecutionException e) { log.warn("Error while trying to reserve a slot for an activity", e.getCause()); return null; } diff --git a/temporal-sdk/src/main/java/io/temporal/internal/worker/LocalActivitySlotSupplierQueue.java b/temporal-sdk/src/main/java/io/temporal/internal/worker/LocalActivitySlotSupplierQueue.java index b573677f6..a2c398179 100644 --- a/temporal-sdk/src/main/java/io/temporal/internal/worker/LocalActivitySlotSupplierQueue.java +++ b/temporal-sdk/src/main/java/io/temporal/internal/worker/LocalActivitySlotSupplierQueue.java @@ -83,18 +83,22 @@ private void processQueue() { QueuedLARequest request = null; try { request = requestQueue.take(); + + CompletableFuture future = slotSupplier.reserveSlot(request.data); try { - slotPermit = slotSupplier.reserveSlot(request.data); + slotPermit = future.get(); } catch (InterruptedException e) { + future.cancel(true); Thread.currentThread().interrupt(); return; - } catch (Exception e) { + } catch (ExecutionException e) { log.error( "Error reserving local activity slot, dropped activity id {}", request.task.getActivityId(), e); continue; } + request.task.getExecutionContext().setPermit(slotPermit); afterReservedCallback.apply(request.task); } catch (InterruptedException e) { diff --git a/temporal-sdk/src/main/java/io/temporal/internal/worker/NexusPollTask.java b/temporal-sdk/src/main/java/io/temporal/internal/worker/NexusPollTask.java index 2fc47c0cd..14472a529 100644 --- a/temporal-sdk/src/main/java/io/temporal/internal/worker/NexusPollTask.java +++ b/temporal-sdk/src/main/java/io/temporal/internal/worker/NexusPollTask.java @@ -32,6 +32,8 @@ import io.temporal.worker.MetricsType; import io.temporal.worker.tuning.*; import java.util.Objects; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutionException; import java.util.function.Supplier; import javax.annotation.Nonnull; import javax.annotation.Nullable; @@ -86,18 +88,19 @@ public NexusTask poll() { PollNexusTaskQueueResponse response; SlotPermit permit; boolean isSuccessful = false; - + CompletableFuture future = + slotSupplier.reserveSlot( + new SlotReservationData( + pollRequest.getTaskQueue().getName(), + pollRequest.getIdentity(), + pollRequest.getWorkerVersionCapabilities().getBuildId())); try { - permit = - slotSupplier.reserveSlot( - new SlotReservationData( - pollRequest.getTaskQueue().getName(), - pollRequest.getIdentity(), - pollRequest.getWorkerVersionCapabilities().getBuildId())); + permit = future.get(); } catch (InterruptedException e) { + future.cancel(true); Thread.currentThread().interrupt(); return null; - } catch (Exception e) { + } catch (ExecutionException e) { log.warn("Error while trying to reserve a slot for a nexus task", e.getCause()); return null; } diff --git a/temporal-sdk/src/main/java/io/temporal/internal/worker/TrackingSlotSupplier.java b/temporal-sdk/src/main/java/io/temporal/internal/worker/TrackingSlotSupplier.java index 6ee36d639..bfdde5ebd 100644 --- a/temporal-sdk/src/main/java/io/temporal/internal/worker/TrackingSlotSupplier.java +++ b/temporal-sdk/src/main/java/io/temporal/internal/worker/TrackingSlotSupplier.java @@ -26,6 +26,7 @@ import java.util.Collections; import java.util.Map; import java.util.Optional; +import java.util.concurrent.CompletableFuture; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.atomic.AtomicInteger; @@ -48,10 +49,15 @@ public TrackingSlotSupplier(SlotSupplier inner, Scope metricsScope) { publishSlotsMetric(); } - public SlotPermit reserveSlot(SlotReservationData dat) throws InterruptedException { - SlotPermit p = inner.reserveSlot(createCtx(dat)); - issuedSlots.incrementAndGet(); - return p; + public CompletableFuture reserveSlot(SlotReservationData dat) { + CompletableFuture future = null; + try { + future = inner.reserveSlot(createCtx(dat)); + } catch (Exception e) { + throw new RuntimeException(e); + } + future.thenAccept(permit -> issuedSlots.incrementAndGet()); + return future; } public Optional tryReserveSlot(SlotReservationData dat) { diff --git a/temporal-sdk/src/main/java/io/temporal/internal/worker/WorkflowPollTask.java b/temporal-sdk/src/main/java/io/temporal/internal/worker/WorkflowPollTask.java index e41cb6d0b..577d73ba5 100644 --- a/temporal-sdk/src/main/java/io/temporal/internal/worker/WorkflowPollTask.java +++ b/temporal-sdk/src/main/java/io/temporal/internal/worker/WorkflowPollTask.java @@ -35,8 +35,12 @@ import io.temporal.serviceclient.MetricsTag; import io.temporal.serviceclient.WorkflowServiceStubs; import io.temporal.worker.MetricsType; -import io.temporal.worker.tuning.*; +import io.temporal.worker.tuning.SlotPermit; +import io.temporal.worker.tuning.SlotReleaseReason; +import io.temporal.worker.tuning.WorkflowSlotInfo; import java.util.Objects; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutionException; import java.util.function.Supplier; import javax.annotation.Nonnull; import javax.annotation.Nullable; @@ -123,17 +127,19 @@ public WorkflowPollTask( public WorkflowTask poll() { boolean isSuccessful = false; SlotPermit permit; + CompletableFuture future = + slotSupplier.reserveSlot( + new SlotReservationData( + pollRequest.getTaskQueue().getName(), + pollRequest.getIdentity(), + pollRequest.getWorkerVersionCapabilities().getBuildId())); try { - permit = - slotSupplier.reserveSlot( - new SlotReservationData( - pollRequest.getTaskQueue().getName(), - pollRequest.getIdentity(), - pollRequest.getWorkerVersionCapabilities().getBuildId())); + permit = future.get(); } catch (InterruptedException e) { + future.cancel(true); Thread.currentThread().interrupt(); return null; - } catch (Exception e) { + } catch (ExecutionException e) { log.warn("Error while trying to reserve a slot for workflow task", e.getCause()); return null; } diff --git a/temporal-sdk/src/main/java/io/temporal/worker/tuning/FixedSizeSlotSupplier.java b/temporal-sdk/src/main/java/io/temporal/worker/tuning/FixedSizeSlotSupplier.java index e9b08d344..4ee29884d 100644 --- a/temporal-sdk/src/main/java/io/temporal/worker/tuning/FixedSizeSlotSupplier.java +++ b/temporal-sdk/src/main/java/io/temporal/worker/tuning/FixedSizeSlotSupplier.java @@ -21,8 +21,11 @@ package io.temporal.worker.tuning; import com.google.common.base.Preconditions; +import java.util.ArrayDeque; import java.util.Optional; -import java.util.concurrent.*; +import java.util.Queue; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.locks.ReentrantLock; /** * This implementation of {@link SlotSupplier} provides a fixed number of slots backed by a @@ -32,18 +35,83 @@ */ public class FixedSizeSlotSupplier implements SlotSupplier { private final int numSlots; - private final Semaphore executorSlotsSemaphore; + private final AsyncSemaphore executorSlotsSemaphore; + + /** + * A simple version of an async semaphore. Unfortunately there's not any readily available + * properly licensed library I could find for this which is a bit shocking, but this + * implementation should be suitable for our needs + */ + static class AsyncSemaphore { + private final ReentrantLock lock = new ReentrantLock(); + private final Queue> waiters = new ArrayDeque<>(); + private int permits; + + AsyncSemaphore(int initialPermits) { + this.permits = initialPermits; + } + + /** + * Acquire a permit asynchronously. If a permit is available, returns a completed future, + * otherwise returns a future that will be completed when a permit is released. + */ + public CompletableFuture acquire() { + lock.lock(); + try { + if (permits > 0) { + permits--; + return CompletableFuture.completedFuture(null); + } else { + CompletableFuture waiter = new CompletableFuture<>(); + waiters.add(waiter); + return waiter; + } + } finally { + lock.unlock(); + } + } + + public boolean tryAcquire() { + lock.lock(); + try { + if (permits > 0) { + permits--; + return true; + } + return false; + } finally { + lock.unlock(); + } + } + + /** + * Release a permit. If there are waiting futures, completes the next one instead of + * incrementing the permit count. + */ + public void release() { + lock.lock(); + try { + CompletableFuture waiter = waiters.poll(); + if (waiter != null) { + waiter.complete(null); + } else { + permits++; + } + } finally { + lock.unlock(); + } + } + } public FixedSizeSlotSupplier(int numSlots) { Preconditions.checkArgument(numSlots > 0, "FixedSizeSlotSupplier must have at least one slot"); this.numSlots = numSlots; - executorSlotsSemaphore = new Semaphore(numSlots); + executorSlotsSemaphore = new AsyncSemaphore(numSlots); } @Override - public SlotPermit reserveSlot(SlotReserveContext ctx) throws InterruptedException { - executorSlotsSemaphore.acquire(); - return new SlotPermit(); + public CompletableFuture reserveSlot(SlotReserveContext ctx) throws Exception { + return executorSlotsSemaphore.acquire().thenApply(ignored -> new SlotPermit()); } @Override diff --git a/temporal-sdk/src/main/java/io/temporal/worker/tuning/ResourceBasedSlotSupplier.java b/temporal-sdk/src/main/java/io/temporal/worker/tuning/ResourceBasedSlotSupplier.java index db671b586..283486c01 100644 --- a/temporal-sdk/src/main/java/io/temporal/worker/tuning/ResourceBasedSlotSupplier.java +++ b/temporal-sdk/src/main/java/io/temporal/worker/tuning/ResourceBasedSlotSupplier.java @@ -24,6 +24,7 @@ import java.time.Duration; import java.time.Instant; import java.util.Optional; +import java.util.concurrent.*; /** Implements a {@link SlotSupplier} based on resource usage for a particular slot type. */ @Experimental @@ -32,6 +33,9 @@ public class ResourceBasedSlotSupplier implements SlotSuppl private final ResourceBasedController resourceController; private final ResourceBasedSlotOptions options; private Instant lastSlotIssuedAt = Instant.EPOCH; + // For slot reservations that are waiting to re-check resource usage + private final ScheduledExecutorService scheduler; + private static ScheduledExecutorService defaultScheduler; /** * Construct a slot supplier for workflow tasks with the given resource controller and options. @@ -42,7 +46,20 @@ public class ResourceBasedSlotSupplier implements SlotSuppl public static ResourceBasedSlotSupplier createForWorkflow( ResourceBasedController resourceBasedController, ResourceBasedSlotOptions options) { return new ResourceBasedSlotSupplier<>( - WorkflowSlotInfo.class, resourceBasedController, options); + WorkflowSlotInfo.class, resourceBasedController, options, null); + } + + /** + * As {@link #createForWorkflow(ResourceBasedController, ResourceBasedSlotOptions)}, but allows + * overriding the internal thread pool. It is recommended to share the same executor across all + * resource based slot suppliers in a worker. + */ + public static ResourceBasedSlotSupplier createForWorkflow( + ResourceBasedController resourceBasedController, + ResourceBasedSlotOptions options, + ScheduledExecutorService scheduler) { + return new ResourceBasedSlotSupplier<>( + WorkflowSlotInfo.class, resourceBasedController, options, scheduler); } /** @@ -54,7 +71,20 @@ public static ResourceBasedSlotSupplier createForWorkflow( public static ResourceBasedSlotSupplier createForActivity( ResourceBasedController resourceBasedController, ResourceBasedSlotOptions options) { return new ResourceBasedSlotSupplier<>( - ActivitySlotInfo.class, resourceBasedController, options); + ActivitySlotInfo.class, resourceBasedController, options, null); + } + + /** + * As {@link #createForActivity(ResourceBasedController, ResourceBasedSlotOptions)}, but allows + * overriding the internal thread pool. It is recommended to share the same executor across all + * resource based slot suppliers in a worker. + */ + public static ResourceBasedSlotSupplier createForActivity( + ResourceBasedController resourceBasedController, + ResourceBasedSlotOptions options, + ScheduledExecutorService scheduler) { + return new ResourceBasedSlotSupplier<>( + ActivitySlotInfo.class, resourceBasedController, options, scheduler); } /** @@ -66,7 +96,20 @@ public static ResourceBasedSlotSupplier createForActivity( public static ResourceBasedSlotSupplier createForLocalActivity( ResourceBasedController resourceBasedController, ResourceBasedSlotOptions options) { return new ResourceBasedSlotSupplier<>( - LocalActivitySlotInfo.class, resourceBasedController, options); + LocalActivitySlotInfo.class, resourceBasedController, options, null); + } + + /** + * As {@link #createForLocalActivity(ResourceBasedController, ResourceBasedSlotOptions)}, but + * allows overriding the internal thread pool. It is recommended to share the same executor across + * all resource based slot suppliers in a worker. + */ + public static ResourceBasedSlotSupplier createForLocalActivity( + ResourceBasedController resourceBasedController, + ResourceBasedSlotOptions options, + ScheduledExecutorService scheduler) { + return new ResourceBasedSlotSupplier<>( + LocalActivitySlotInfo.class, resourceBasedController, options, scheduler); } /** @@ -77,14 +120,34 @@ public static ResourceBasedSlotSupplier createForLocalAct */ public static ResourceBasedSlotSupplier createForNexus( ResourceBasedController resourceBasedController, ResourceBasedSlotOptions options) { - return new ResourceBasedSlotSupplier<>(NexusSlotInfo.class, resourceBasedController, options); + return new ResourceBasedSlotSupplier<>( + NexusSlotInfo.class, resourceBasedController, options, null); + } + + /** + * As {@link #createForNexus(ResourceBasedController, ResourceBasedSlotOptions)}, but allows + * overriding the internal thread pool. It is recommended to share the same executor across all + * resource based slot suppliers in a worker. + */ + public static ResourceBasedSlotSupplier createForNexus( + ResourceBasedController resourceBasedController, + ResourceBasedSlotOptions options, + ScheduledExecutorService scheduler) { + return new ResourceBasedSlotSupplier<>( + NexusSlotInfo.class, resourceBasedController, options, scheduler); } private ResourceBasedSlotSupplier( Class clazz, ResourceBasedController resourceBasedController, - ResourceBasedSlotOptions options) { + ResourceBasedSlotOptions options, + ScheduledExecutorService scheduler) { this.resourceController = resourceBasedController; + if (scheduler == null) { + this.scheduler = getDefaultScheduler(); + } else { + this.scheduler = scheduler; + } // Merge default options for any unset fields if (WorkflowSlotInfo.class.isAssignableFrom(clazz)) { this.options = @@ -139,29 +202,43 @@ private ResourceBasedSlotSupplier( } @Override - public SlotPermit reserveSlot(SlotReserveContext ctx) throws InterruptedException { - while (true) { - if (ctx.getNumIssuedSlots() < options.getMinimumSlots()) { - return new SlotPermit(); - } else { - Duration mustWaitFor; - try { - mustWaitFor = options.getRampThrottle().minus(timeSinceLastSlotIssued()); - } catch (ArithmeticException e) { - mustWaitFor = Duration.ZERO; - } - if (mustWaitFor.compareTo(Duration.ZERO) > 0) { - Thread.sleep(mustWaitFor.toMillis()); - } - - Optional permit = tryReserveSlot(ctx); - if (permit.isPresent()) { - return permit.get(); - } else { - Thread.sleep(10); - } - } + public CompletableFuture reserveSlot(SlotReserveContext ctx) throws Exception { + if (ctx.getNumIssuedSlots() < options.getMinimumSlots()) { + return CompletableFuture.completedFuture(new SlotPermit()); + } + return tryReserveSlot(ctx) + .map(CompletableFuture::completedFuture) + .orElseGet(() -> scheduleSlotAcquisition(ctx)); + } + + private CompletableFuture scheduleSlotAcquisition(SlotReserveContext ctx) { + Duration mustWaitFor; + try { + mustWaitFor = options.getRampThrottle().minus(timeSinceLastSlotIssued()); + } catch (ArithmeticException e) { + mustWaitFor = Duration.ZERO; + } + + CompletableFuture permitFuture; + if (mustWaitFor.compareTo(Duration.ZERO) > 0) { + permitFuture = + CompletableFuture.supplyAsync(() -> null, delayedExecutor(mustWaitFor.toMillis())); + } else { + permitFuture = CompletableFuture.completedFuture(null); } + + // After the delay, try to reserve the slot + return permitFuture.thenCompose( + ignored -> { + Optional permit = tryReserveSlot(ctx); + // If we couldn't get a slot this time, delay for a short period and try again + return permit + .map(CompletableFuture::completedFuture) + .orElseGet( + () -> + CompletableFuture.supplyAsync(() -> null, delayedExecutor(10)) + .thenCompose(ig -> scheduleSlotAcquisition(ctx))); + }); } @Override @@ -190,4 +267,28 @@ public ResourceBasedController getResourceController() { private Duration timeSinceLastSlotIssued() { return Duration.between(lastSlotIssuedAt, Instant.now()); } + + // Polyfill for Java 9 delayedExecutor + private Executor delayedExecutor(long delay) { + return r -> scheduler.schedule(() -> scheduler.execute(r), delay, TimeUnit.MILLISECONDS); + } + + private static ScheduledExecutorService getDefaultScheduler() { + synchronized (ResourceBasedSlotSupplier.class) { + if (defaultScheduler == null) { + defaultScheduler = + Executors.newScheduledThreadPool( + // Two threads seem needed here, so that reading PID decisions doesn't interfere + // overly with firing off scheduled tasks or one another. + 2, + r -> { + Thread t = new Thread(r); + t.setName("ResourceBasedSlotSupplier.scheduler"); + t.setDaemon(true); + return t; + }); + } + return defaultScheduler; + } + } } diff --git a/temporal-sdk/src/main/java/io/temporal/worker/tuning/ResourceBasedTuner.java b/temporal-sdk/src/main/java/io/temporal/worker/tuning/ResourceBasedTuner.java index 77573f93f..c3e7f7df3 100644 --- a/temporal-sdk/src/main/java/io/temporal/worker/tuning/ResourceBasedTuner.java +++ b/temporal-sdk/src/main/java/io/temporal/worker/tuning/ResourceBasedTuner.java @@ -22,6 +22,7 @@ import io.temporal.common.Experimental; import java.time.Duration; +import java.util.concurrent.ScheduledExecutorService; import javax.annotation.Nonnull; /** A {@link WorkerTuner} that attempts to allocate slots based on available system resources. */ @@ -51,6 +52,7 @@ public class ResourceBasedTuner implements WorkerTuner { private final ResourceBasedSlotOptions activitySlotOptions; private final ResourceBasedSlotOptions localActivitySlotOptions; private final ResourceBasedSlotOptions nexusSlotOptions; + private final ScheduledExecutorService executor; public static Builder newBuilder() { return new Builder(); @@ -63,6 +65,7 @@ public static final class Builder { private @Nonnull ResourceBasedSlotOptions localActivitySlotOptions = DEFAULT_ACTIVITY_SLOT_OPTIONS; private @Nonnull ResourceBasedSlotOptions nexusSlotOptions = DEFAULT_NEXUS_SLOT_OPTIONS; + private @Nonnull ScheduledExecutorService executor; private Builder() {} @@ -115,13 +118,23 @@ public Builder setNexusSlotOptions(@Nonnull ResourceBasedSlotOptions nexusSlotOp return this; } + /** + * Set the executor used for checking resource usage periodically. Defaults to a two-thread + * pool. + */ + public Builder setExecutor(@Nonnull ScheduledExecutorService executor) { + this.executor = executor; + return this; + } + public ResourceBasedTuner build() { return new ResourceBasedTuner( controllerOptions, workflowSlotOptions, activitySlotOptions, localActivitySlotOptions, - nexusSlotOptions); + nexusSlotOptions, + executor); } } @@ -133,35 +146,38 @@ public ResourceBasedTuner( ResourceBasedSlotOptions workflowSlotOptions, ResourceBasedSlotOptions activitySlotOptions, ResourceBasedSlotOptions localActivitySlotOptions, - ResourceBasedSlotOptions nexusSlotOptions) { + ResourceBasedSlotOptions nexusSlotOptions, + ScheduledExecutorService executor) { this.controller = ResourceBasedController.newSystemInfoController(controllerOptions); this.workflowSlotOptions = workflowSlotOptions; this.activitySlotOptions = activitySlotOptions; this.localActivitySlotOptions = localActivitySlotOptions; this.nexusSlotOptions = nexusSlotOptions; + this.executor = executor; } @Nonnull @Override public SlotSupplier getWorkflowTaskSlotSupplier() { - return ResourceBasedSlotSupplier.createForWorkflow(controller, workflowSlotOptions); + return ResourceBasedSlotSupplier.createForWorkflow(controller, workflowSlotOptions, executor); } @Nonnull @Override public SlotSupplier getActivityTaskSlotSupplier() { - return ResourceBasedSlotSupplier.createForActivity(controller, activitySlotOptions); + return ResourceBasedSlotSupplier.createForActivity(controller, activitySlotOptions, executor); } @Nonnull @Override public SlotSupplier getLocalActivitySlotSupplier() { - return ResourceBasedSlotSupplier.createForLocalActivity(controller, localActivitySlotOptions); + return ResourceBasedSlotSupplier.createForLocalActivity( + controller, localActivitySlotOptions, executor); } @Nonnull @Override public SlotSupplier getNexusSlotSupplier() { - return ResourceBasedSlotSupplier.createForNexus(controller, nexusSlotOptions); + return ResourceBasedSlotSupplier.createForNexus(controller, nexusSlotOptions, executor); } } diff --git a/temporal-sdk/src/main/java/io/temporal/worker/tuning/SlotSupplier.java b/temporal-sdk/src/main/java/io/temporal/worker/tuning/SlotSupplier.java index fa584d443..f6f800e26 100644 --- a/temporal-sdk/src/main/java/io/temporal/worker/tuning/SlotSupplier.java +++ b/temporal-sdk/src/main/java/io/temporal/worker/tuning/SlotSupplier.java @@ -22,6 +22,7 @@ import io.temporal.common.Experimental; import java.util.Optional; +import java.util.concurrent.CompletableFuture; /** * A SlotSupplier is responsible for managing the number of slots available for a given type of @@ -36,16 +37,19 @@ @Experimental public interface SlotSupplier { /** - * This function is called before polling for new tasks. Your implementation should block until a - * slot is available then return a permit to use that slot. + * This function is called before polling for new tasks. Your implementation should return a + * Promise that is completed with a {@link SlotPermit} when one becomes available. + * + *

These futures may be cancelled if the worker is shutting down or otherwise abandons the + * reservation. This can cause an {@link InterruptedException} to be thrown, in the thread running + * your implementation. You may want to catch it to perform any necessary cleanup, and then you + * should rethrow the exception. Other thrown exceptions will be logged. * * @param ctx The context for slot reservation. - * @return A permit to use the slot which may be populated with your own data. - * @throws InterruptedException The worker may choose to interrupt the thread in order to cancel - * the reservation, or during shutdown. You may perform cleanup, and then should rethrow the - * exception. + * @return A future that will be completed with a permit to use the slot when one becomes + * available. Never return null, or complete the future with null. */ - SlotPermit reserveSlot(SlotReserveContext ctx) throws InterruptedException; + CompletableFuture reserveSlot(SlotReserveContext ctx) throws Exception; /** * This function is called when trying to reserve slots for "eager" workflow and activity tasks. diff --git a/temporal-sdk/src/test/java/io/temporal/internal/worker/SlotSupplierTest.java b/temporal-sdk/src/test/java/io/temporal/internal/worker/SlotSupplierTest.java index 8a393bd7e..a7fa570a3 100644 --- a/temporal-sdk/src/test/java/io/temporal/internal/worker/SlotSupplierTest.java +++ b/temporal-sdk/src/test/java/io/temporal/internal/worker/SlotSupplierTest.java @@ -37,7 +37,7 @@ import io.temporal.serviceclient.WorkflowServiceStubs; import io.temporal.worker.tuning.*; import java.util.Objects; -import java.util.concurrent.TimeoutException; +import java.util.concurrent.CompletableFuture; import java.util.concurrent.atomic.AtomicInteger; import org.junit.Test; import org.junit.runner.RunWith; @@ -60,7 +60,7 @@ public static Object[] data() { } @Test - public void supplierIsCalledAppropriately() throws InterruptedException, TimeoutException { + public void supplierIsCalledAppropriately() { WorkflowServiceStubs client = mock(WorkflowServiceStubs.class); when(client.getServerCapabilities()) .thenReturn(() -> GetSystemInfoResponse.Capabilities.newBuilder().build()); @@ -71,13 +71,17 @@ public void supplierIsCalledAppropriately() throws InterruptedException, Timeout SlotSupplier mockSupplier = mock(SlotSupplier.class); AtomicInteger usedSlotsWhenCalled = new AtomicInteger(-1); - when(mockSupplier.reserveSlot( - argThat( - src -> { - usedSlotsWhenCalled.set(src.getUsedSlots().size()); - return true; - }))) - .thenReturn(new SlotPermit()); + try { + when(mockSupplier.reserveSlot( + argThat( + src -> { + usedSlotsWhenCalled.set(src.getUsedSlots().size()); + return true; + }))) + .thenReturn(CompletableFuture.completedFuture(new SlotPermit())); + } catch (Exception e) { + throw new RuntimeException(e); + } StickyQueueBalancer stickyQueueBalancer = new StickyQueueBalancer(5, true); Scope metricsScope = @@ -119,7 +123,11 @@ public void supplierIsCalledAppropriately() throws InterruptedException, Timeout if (throwOnPoll) { assertThrows(RuntimeException.class, poller::poll); - verify(mockSupplier, times(1)).reserveSlot(any()); + try { + verify(mockSupplier, times(1)).reserveSlot(any()); + } catch (Exception e) { + throw new RuntimeException(e); + } verify(mockSupplier, times(1)).releaseSlot(any()); assertEquals(0, trackingSS.getUsedSlots().size()); } else { @@ -128,8 +136,12 @@ public void supplierIsCalledAppropriately() throws InterruptedException, Timeout // We can't test this in the verifier, since it will get an up-to-date reference to the map // where the slot *is* used. assertEquals(0, usedSlotsWhenCalled.get()); - verify(mockSupplier, times(1)) - .reserveSlot(argThat(arg -> Objects.equals(arg.getTaskQueue(), TASK_QUEUE))); + try { + verify(mockSupplier, times(1)) + .reserveSlot(argThat(arg -> Objects.equals(arg.getTaskQueue(), TASK_QUEUE))); + } catch (Exception e) { + throw new RuntimeException(e); + } verify(mockSupplier, times(0)).releaseSlot(any()); assertEquals(1, trackingSS.getUsedSlots().size()); } diff --git a/temporal-sdk/src/test/java/io/temporal/internal/worker/WorkflowSlotsSmallSizeTests.java b/temporal-sdk/src/test/java/io/temporal/internal/worker/WorkflowSlotsSmallSizeTests.java index 7020e37b3..b4bec7654 100644 --- a/temporal-sdk/src/test/java/io/temporal/internal/worker/WorkflowSlotsSmallSizeTests.java +++ b/temporal-sdk/src/test/java/io/temporal/internal/worker/WorkflowSlotsSmallSizeTests.java @@ -22,7 +22,6 @@ import static org.junit.Assert.assertEquals; -import com.uber.m3.util.ImmutableMap; import io.temporal.activity.ActivityInterface; import io.temporal.activity.ActivityMethod; import io.temporal.activity.ActivityOptions; @@ -38,7 +37,6 @@ import java.time.Duration; import java.util.ArrayList; import java.util.List; -import java.util.Map; import java.util.concurrent.Semaphore; import java.util.concurrent.TimeUnit; import org.junit.After; @@ -181,20 +179,9 @@ public String activity(String input) { } } - private Map getWorkerTags(String workerType) { - return ImmutableMap.of( - "worker_type", - workerType, - "task_queue", - testWorkflowRule.getTaskQueue(), - "namespace", - "UnitTest"); - } - private void assertIntraWFTSlotCount(int allowedToRun) { int runningLAs = activitiesAreLocal ? allowedToRun : 0; int runningAs = activitiesAreLocal ? 0 : allowedToRun; - int runningWFTs = activitiesAreLocal ? 1 : 0; assertCurrentUsedCount(runningAs, runningLAs); } diff --git a/temporal-sdk/src/test/java/io/temporal/testUtils/CountingSlotSupplier.java b/temporal-sdk/src/test/java/io/temporal/testUtils/CountingSlotSupplier.java index a9d0045e0..ebd6c4643 100644 --- a/temporal-sdk/src/test/java/io/temporal/testUtils/CountingSlotSupplier.java +++ b/temporal-sdk/src/test/java/io/temporal/testUtils/CountingSlotSupplier.java @@ -22,6 +22,7 @@ import io.temporal.worker.tuning.*; import java.util.Optional; +import java.util.concurrent.CompletableFuture; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.atomic.AtomicInteger; @@ -37,10 +38,13 @@ public CountingSlotSupplier(int numSlots) { } @Override - public SlotPermit reserveSlot(SlotReserveContext ctx) throws InterruptedException { - SlotPermit p = super.reserveSlot(ctx); - reservedCount.incrementAndGet(); - return p; + public CompletableFuture reserveSlot(SlotReserveContext ctx) throws Exception { + CompletableFuture p = super.reserveSlot(ctx); + return p.thenApply( + permit -> { + reservedCount.incrementAndGet(); + return permit; + }); } @Override diff --git a/temporal-sdk/src/test/java/io/temporal/worker/ResourceBasedTunerTests.java b/temporal-sdk/src/test/java/io/temporal/worker/ResourceBasedTunerTests.java index ca61f7371..0e65065e3 100644 --- a/temporal-sdk/src/test/java/io/temporal/worker/ResourceBasedTunerTests.java +++ b/temporal-sdk/src/test/java/io/temporal/worker/ResourceBasedTunerTests.java @@ -27,6 +27,8 @@ import io.temporal.activity.ActivityInterface; import io.temporal.activity.ActivityOptions; import io.temporal.activity.LocalActivityOptions; +import io.temporal.client.WorkflowClient; +import io.temporal.client.WorkflowOptions; import io.temporal.common.reporter.TestStatsReporter; import io.temporal.serviceclient.MetricsTag; import io.temporal.testing.internal.SDKTestWorkflowRule; @@ -36,6 +38,7 @@ import java.util.ArrayList; import java.util.List; import java.util.Map; +import java.util.concurrent.TimeUnit; import org.junit.Rule; import org.junit.Test; import org.junit.experimental.categories.Category; @@ -99,13 +102,37 @@ public void canRunHeavyMemoryWithResourceBasedTuner() { workflow.execute(50, 50, 30000000); } + @Test(timeout = 30 * 1000) + public void canShutdownInTheMiddle() throws InterruptedException { + WorkflowClient client = testWorkflowRule.getWorkflowClient(); + ResourceTunerWorkflow workflow = + client.newWorkflowStub( + ResourceTunerWorkflow.class, + WorkflowOptions.newBuilder() + .setTaskQueue(testWorkflowRule.getTaskQueue()) + .validateBuildWithDefaults()); + WorkflowClient.start(workflow::execute, 10, 10, 1000); + workflow.activitiesStarted(); + testWorkflowRule.getTestEnvironment().getWorkerFactory().shutdownNow(); + testWorkflowRule.getTestEnvironment().getWorkerFactory().awaitTermination(3, TimeUnit.SECONDS); + reporter.assertGauge(MetricsType.WORKER_TASK_SLOTS_USED, getWorkerTags("WorkflowWorker"), 0); + reporter.assertGauge(MetricsType.WORKER_TASK_SLOTS_USED, getWorkerTags("ActivityWorker"), 0); + reporter.assertGauge( + MetricsType.WORKER_TASK_SLOTS_USED, getWorkerTags("LocalActivityWorker"), 0); + } + @WorkflowInterface public interface ResourceTunerWorkflow { @WorkflowMethod String execute(int numActivities, int localActivities, int memCeiling); + + @UpdateMethod + void activitiesStarted(); } public static class ResourceTunerWorkflowImpl implements ResourceTunerWorkflow { + private boolean activitiesStarted = false; + @Override public String execute(int numActivities, int localActivities, int memCeiling) { SleepActivity activity = @@ -133,12 +160,19 @@ public String execute(int numActivities, int localActivities, int memCeiling) { promises.add(promise); } + activitiesStarted = true; + for (Promise promise : promises) { promise.get(); } return "I'm done"; } + + @Override + public void activitiesStarted() { + Workflow.await(() -> activitiesStarted); + } } @ActivityInterface