Skip to content

Commit

Permalink
Introduce CustomMetricReporter
Browse files Browse the repository at this point in the history
  • Loading branch information
yashaswaj authored and Shubham Tagra committed Jul 17, 2020
1 parent 95bdf11 commit 27f4a5c
Show file tree
Hide file tree
Showing 14 changed files with 249 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ Fixes and Features
------------------
* Fixed a regression from 0.3.11 which slows down split generation.
* Jmx stats refactoring to for better accounting of stats.
* Added support to plug in custom reporter for metrics that can send metrics to custom sinks. It can be set used by setting `rubix.metrics.reporters=CUSTOM` and providing implementation class using `rubix.metric-collector.impl`.
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
import com.codahale.metrics.JmxReporter;
import com.codahale.metrics.MetricRegistry;
import com.codahale.metrics.ganglia.GangliaReporter;
import com.google.common.base.Splitter;
import com.qubole.rubix.common.utils.ClusterUtil;
import com.qubole.rubix.spi.CacheConfig;
import com.readytalk.metrics.StatsDReporter;
Expand All @@ -28,9 +27,12 @@
import java.io.Closeable;
import java.io.IOException;
import java.util.HashSet;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.TimeUnit;

import static com.qubole.rubix.common.utils.ClusterUtil.getMetricsReporters;

public class BookKeeperMetrics implements AutoCloseable
{
private static Log log = LogFactory.getLog(BookKeeperMetrics.class);
Expand Down Expand Up @@ -63,12 +65,7 @@ public MetricRegistry getMetricsRegistry()
*/
protected void initializeReporters()
{
final Iterable<String> metricsReporterNames = Splitter.on(",").trimResults().omitEmptyStrings().split(CacheConfig.getMetricsReporters(conf));

final Set<MetricsReporterType> metricsReporterTypes = new HashSet<>();
for (String reporterName : metricsReporterNames) {
metricsReporterTypes.add(MetricsReporterType.valueOf(reporterName.toUpperCase()));
}
final Set<MetricsReporterType> metricsReporterTypes = getMetricsReporters(conf);

for (MetricsReporterType reporter : metricsReporterTypes) {
switch (reporter) {
Expand Down Expand Up @@ -111,6 +108,10 @@ protected void initializeReporters()
gangliaReporter.start(CacheConfig.getMetricsReportingInterval(conf), TimeUnit.MILLISECONDS);
reporters.add(gangliaReporter);
break;
case CUSTOM:
CustomMetricsReporterProvider.initialize(conf, Optional.of(metrics));
reporters.add(CustomMetricsReporterProvider.getCustomMetricsReporter());
break;
}
}
}
Expand Down Expand Up @@ -216,7 +217,7 @@ public enum CacheMetric
ASYNC_QUEUE_SIZE_GAUGE("rubix.bookkeeper.gauge.async_queue_size"),
ASYNC_DOWNLOADED_MB_COUNT("rubix.bookkeeper.count.async_downloaded_mb"),
ASYNC_DOWNLOAD_TIME_COUNT("rubix.bookkeeper.count.async_download_time"),
LDTS_CACHING_EXCEPTION("caching_exception_while_transferring_data");
LDTS_CACHING_EXCEPTION("rubix.ldts.exception.trasnsferdata");

private final String metricName;

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
/**
* Copyright (c) 2019. Qubole Inc
* Licensed under the Apache License, Version 2.0 (the License);
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License. See accompanying LICENSE file.
*/
package com.qubole.rubix.common.metrics;

import java.util.HashMap;
import java.util.Map;

public enum CachingFileSystemMetrics {
LOCAL_FALLBACK_TO_DIRECT_READ("rubix_local_cache_fallback_direct_read"),
NON_LOCAL_FALLBACK_TO_DIRECT_READ("rubix_non_local_cache_fallback_direct_read"),
POSITIONAL_READ_FAILURE("rubix_positional_read_failure");

private final String metricName;
//reverse lookup map for metric.
private static final Map<String, CachingFileSystemMetrics> lookup = new HashMap<>();

CachingFileSystemMetrics(String metricName)
{
this.metricName = metricName;
}

public String getMetricName()
{
return metricName;
}

static {
for(CachingFileSystemMetrics s : CachingFileSystemMetrics.values())
lookup.put(s.getMetricName(), s);
}

public static CachingFileSystemMetrics get(String enumString) {
return lookup.get(enumString);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
/**
* Copyright (c) 2019. Qubole Inc
* Licensed under the Apache License, Version 2.0 (the License);
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License. See accompanying LICENSE file.
*/
package com.qubole.rubix.common.metrics;

import java.io.Closeable;

public interface CustomMetricsReporter extends Closeable {

public void start();

public void addMetric(CachingFileSystemMetrics cachingFileSystemMetrics);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
/**
* Copyright (c) 2019. Qubole Inc
* Licensed under the Apache License, Version 2.0 (the License);
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License. See accompanying LICENSE file.
*/
package com.qubole.rubix.common.metrics;

import com.codahale.metrics.MetricRegistry;
import com.qubole.rubix.spi.CacheConfig;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;

import java.util.Optional;
import java.util.concurrent.atomic.AtomicReference;

import static com.qubole.rubix.common.utils.ClusterUtil.getMetricsReporters;

public class CustomMetricsReporterProvider {
private static final Log log = LogFactory.getLog(CustomMetricsReporterProvider.class);

private static volatile AtomicReference<Boolean> reporterRunning = new AtomicReference<>();

private CustomMetricsReporter customMetricsReporter;
private static volatile CustomMetricsReporterProvider customMetricsReporterProvider;

private CustomMetricsReporterProvider(CustomMetricsReporter customMetricsReporter) {
this.customMetricsReporter = customMetricsReporter;
}

public static void initialize(Configuration configuration)
{
initialize(configuration, Optional.empty());
}

public static void initialize(Configuration configuration, Optional<MetricRegistry> metricRegistry) {
if (customMetricsReporterProvider == null) {
synchronized (CustomMetricsReporterProvider.class) {
if (customMetricsReporterProvider == null) {
String className = CacheConfig.getRubixMetricCollectorImpl(configuration);
// check if custom reporter is enabled: Check here for CFS metrics Reporter.
boolean useCustomReporter = getMetricsReporters(configuration).contains(MetricsReporterType.CUSTOM);
CustomMetricsReporter customMetricsReporter;
if (useCustomReporter && !className.equals("com.qubole.rubix.common.metrics.NoOpReporter")) {
try {
Class collectorClass = Class.forName(className);
log.info(String.format("Using class for metric reporting: %s", className));
customMetricsReporter = (CustomMetricsReporter) collectorClass.getDeclaredConstructor(Configuration.class, Optional.class)
.newInstance(configuration, metricRegistry);
} catch (Exception e) {
log.warn("External Metric Reporter class: %s can not be initialized: ", e);
customMetricsReporter = new NoOpReporter();
}
} else {
customMetricsReporter = new NoOpReporter();
}
customMetricsReporterProvider = new CustomMetricsReporterProvider(customMetricsReporter);
}
}
}
}

public static CustomMetricsReporter getCustomMetricsReporter() {
if (reporterRunning.get() == null) {
synchronized (reporterRunning) {
if (reporterRunning.get() == null) {
try {
customMetricsReporterProvider.customMetricsReporter.start();
} catch (Exception e) {
log.warn("Exception in starting Custom reporter: ", e);
customMetricsReporterProvider.customMetricsReporter = new NoOpReporter();
}
reporterRunning.set(true);
}
}
}
return customMetricsReporterProvider.customMetricsReporter;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,6 @@ public enum MetricsReporterType
{
STATSD,
JMX,
GANGLIA
GANGLIA,
CUSTOM
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
/**
* Copyright (c) 2019. Qubole Inc
* Licensed under the Apache License, Version 2.0 (the License);
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License. See accompanying LICENSE file.
*/
package com.qubole.rubix.common.metrics;

import java.io.IOException;

public class NoOpReporter implements CustomMetricsReporter {
public NoOpReporter() {
}

@Override
public void start() {
}

@Override
public void addMetric(CachingFileSystemMetrics cachingFileSystemMetrics) {
}

@Override
public void close() throws IOException {
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,18 @@

package com.qubole.rubix.common.utils;

import com.qubole.rubix.common.metrics.MetricsReporterType;
import com.qubole.rubix.spi.CacheConfig;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;

import java.io.File;
import java.util.Set;
import java.util.concurrent.atomic.AtomicReference;
import java.util.stream.Collectors;
import java.util.stream.Stream;


public class ClusterUtil
Expand Down Expand Up @@ -85,4 +89,14 @@ public static Configuration applyRubixSiteConfig(Configuration conf)

return conf;
}

public static Set<MetricsReporterType> getMetricsReporters(Configuration configuration)
{
return Stream.of(CacheConfig.getMetricsReporters(configuration).split(","))
.map(String::trim)
.filter(s -> !s.isEmpty())
.map(String::toUpperCase)
.map(MetricsReporterType::valueOf)
.collect(Collectors.toSet());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,21 @@

package com.qubole.rubix.common.utils;

import com.qubole.rubix.common.metrics.MetricsReporterType;
import com.qubole.rubix.spi.CacheConfig;
import org.apache.hadoop.conf.Configuration;
import org.testng.Assert;
import org.testng.annotations.AfterMethod;
import org.testng.annotations.Test;

import java.util.Set;
import java.util.concurrent.atomic.AtomicReference;

import static com.qubole.rubix.common.utils.ClusterUtil.getMetricsReporters;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertNotNull;
import static org.testng.Assert.assertNull;
import static org.testng.Assert.assertTrue;

public class TestClusterUtil
{
Expand Down Expand Up @@ -129,4 +133,15 @@ public void testEmptyRubixSite()
CacheConfig.setRubixSiteLocation(configuration, rubixSiteXmlName);
Assert.assertThrows(Exception.class, () -> ClusterUtil.applyRubixSiteConfig(configuration));
}

@Test
public void testGetMetricReportors()
{
Configuration conf = new Configuration();
CacheConfig.setMetricsReporters(conf, "JmX, , ganglia , , ,,");
Set<MetricsReporterType> reporterSet = getMetricsReporters(conf);
assertEquals(reporterSet.size(), 2, "Number of reporter not correct");
assertTrue(reporterSet.contains(MetricsReporterType.JMX), "Metrics reporters not resolved correctly");
assertTrue(reporterSet.contains(MetricsReporterType.GANGLIA), "Metrics reporters not resolved correctly");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
package com.qubole.rubix.core;

import com.google.common.annotations.VisibleForTesting;
import com.qubole.rubix.common.metrics.CachingFileSystemMetrics;
import com.qubole.rubix.common.metrics.CustomMetricsReporterProvider;
import com.qubole.rubix.spi.BookKeeperFactory;
import com.qubole.rubix.spi.CacheUtil;
import com.qubole.rubix.spi.RetryingPooledBookkeeperClient;
Expand Down Expand Up @@ -148,8 +150,8 @@ public Long call() throws IOException
if (ex instanceof CancelledException) {
throw ex;
}

log.error(String.format("Fall back to read from object store for %s .Could not read data from cached file : ", localCachedFile), ex);
CustomMetricsReporterProvider.getCustomMetricsReporter().addMetric(CachingFileSystemMetrics.LOCAL_FALLBACK_TO_DIRECT_READ);
needsInvalidation = true;
directDataRead = readFromRemoteFileSystem();
return directDataRead;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

import com.google.common.base.Strings;
import com.google.common.base.Throwables;
import com.qubole.rubix.common.metrics.CustomMetricsReporterProvider;
import com.qubole.rubix.spi.BookKeeperFactory;
import com.qubole.rubix.spi.CacheConfig;
import com.qubole.rubix.spi.ClusterManager;
Expand Down Expand Up @@ -189,6 +190,7 @@ public void initialize(URI uri, Configuration conf) throws IOException
conf = applyRubixSiteConfig(conf);
initialize(conf, getClusterType());
super.initialize(getOriginalURI(uri), conf);
CustomMetricsReporterProvider.initialize(conf);
this.uri = URI.create(uri.getScheme() + "://" + uri.getAuthority());
this.workingDir = new Path("/user", System.getProperty("user.name")).makeQualified(this);
isRubixSchemeUsed = uri.getScheme().equals(CacheConfig.RUBIX_SCHEME);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import com.google.common.util.concurrent.ListenableFuture;
import com.google.common.util.concurrent.ListeningExecutorService;
import com.google.common.util.concurrent.MoreExecutors;
import com.qubole.rubix.common.metrics.CustomMetricsReporterProvider;
import com.qubole.rubix.spi.BookKeeperFactory;
import com.qubole.rubix.spi.CacheConfig;
import com.qubole.rubix.spi.ClusterType;
Expand Down Expand Up @@ -47,6 +48,7 @@
import java.util.concurrent.Executors;
import java.util.concurrent.ThreadFactory;

import static com.qubole.rubix.common.metrics.CachingFileSystemMetrics.POSITIONAL_READ_FAILURE;
import static com.qubole.rubix.spi.CacheUtil.UNKONWN_GENERATION_NUMBER;
import static org.apache.hadoop.fs.FSExceptionMessages.NEGATIVE_SEEK;

Expand Down Expand Up @@ -185,6 +187,7 @@ public int read(byte[] buffer, int offset, int length)
}
catch (Exception e) {
log.error(String.format("Failed to read from rubix for file %s position %d length %d. Falling back to remote", remotePath, nextReadPosition, length), e);
CustomMetricsReporterProvider.getCustomMetricsReporter().addMetric(POSITIONAL_READ_FAILURE);
getParentDataInputStream().seek(nextReadPosition);
int read = readFullyDirect(buffer, offset, length);
if (read > 0) {
Expand Down
Loading

0 comments on commit 27f4a5c

Please sign in to comment.