diff --git a/priam/src/main/java/com/netflix/priam/utils/CassandraMonitor.java b/priam/src/main/java/com/netflix/priam/utils/CassandraMonitor.java index e67609131..5994efe9f 100644 --- a/priam/src/main/java/com/netflix/priam/utils/CassandraMonitor.java +++ b/priam/src/main/java/com/netflix/priam/utils/CassandraMonitor.java @@ -1,21 +1,22 @@ /* * Copyright 2013 Netflix, Inc. * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except + * in compliance with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Unless required by applicable law or agreed to in writing, software distributed under the License + * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing permissions and limitations under + * the License. * */ package com.netflix.priam.utils; +import com.google.common.util.concurrent.SimpleTimeLimiter; +import com.google.common.util.concurrent.ThreadFactoryBuilder; +import com.google.common.util.concurrent.UncheckedTimeoutException; import com.google.inject.Inject; import com.google.inject.Singleton; import com.netflix.priam.config.IConfiguration; @@ -29,6 +30,8 @@ import java.io.File; import java.io.IOException; import java.io.InputStreamReader; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import org.apache.cassandra.tools.NodeProbe; import org.apache.commons.io.IOUtils; @@ -44,9 +47,25 @@ public class CassandraMonitor extends Task { public static final String JOBNAME = "CASS_MONITOR_THREAD"; private static final Logger logger = LoggerFactory.getLogger(CassandraMonitor.class); private static final AtomicBoolean isCassandraStarted = new AtomicBoolean(false); + private static final int JMX_OPERATION_TIMEOUT_IN_MILLIS = 1000; + private final InstanceState instanceState; private final ICassandraProcess cassProcess; private final CassMonitorMetrics cassMonitorMetrics; + private final SimpleTimeLimiter jmxTimeLimiter = + SimpleTimeLimiter.create( + Executors.newFixedThreadPool( + 1, + new ThreadFactoryBuilder() + .setDaemon(true) + .setNameFormat("JMX-Executor") + .setUncaughtExceptionHandler( + (t, e) -> + logger.debug( + "unexpected exception {} caught by thread {}", + e, + t)) + .build())); @Inject protected CassandraMonitor( @@ -90,9 +109,27 @@ public void execute() throws Exception { instanceState.setCassandraProcessAlive(true); isCassandraStarted.set(true); NodeProbe bean = JMXNodeTool.instance(this.config); - instanceState.setIsGossipActive(bean.isGossipRunning()); - instanceState.setIsNativeTransportActive(bean.isNativeTransportRunning()); - instanceState.setIsThriftActive(bean.isThriftServerRunning()); + + try { + instanceState.setIsGossipActive( + jmxTimeLimiter.callUninterruptiblyWithTimeout( + () -> bean.isGossipRunning(), + JMX_OPERATION_TIMEOUT_IN_MILLIS, + TimeUnit.MILLISECONDS)); + instanceState.setIsNativeTransportActive( + jmxTimeLimiter.callUninterruptiblyWithTimeout( + () -> bean.isNativeTransportRunning(), + JMX_OPERATION_TIMEOUT_IN_MILLIS, + TimeUnit.MILLISECONDS)); + instanceState.setIsThriftActive( + jmxTimeLimiter.callUninterruptiblyWithTimeout( + () -> bean.isThriftServerRunning(), + JMX_OPERATION_TIMEOUT_IN_MILLIS, + TimeUnit.MILLISECONDS)); + } catch (UncheckedTimeoutException e) { + // don't mark C* unhealthy. + logger.warn("jmx operations timed out."); + } } else { // Setting cassandra flag to false instanceState.setCassandraProcessAlive(false);