Skip to content

Commit

Permalink
support system_metrics_cmd in config.properties (#3000)
Browse files Browse the repository at this point in the history
* support system_metrics_cmd in config.properties

* address security concern

* add log

* update readme
  • Loading branch information
lxning authored Mar 5, 2024
1 parent 424b66d commit 1ff1b3b
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 8 deletions.
1 change: 1 addition & 0 deletions docs/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,7 @@ e.g. : To allow base URLs `https://s3.amazonaws.com/` and `https://torchserve.py
* For security reason, `use_env_allowed_urls=true` is required in config.properties to read `allowed_urls` from environment variable.
* `workflow_store` : Path of workflow store directory. Defaults to model store directory.
* `disable_system_metrics` : Disable collection of system metrics when set to "true". Default value is "false".
* `system_metrics_cmd`: The customized system metrics python script name with arguments. For example:`ts/metrics/metric_collector.py --gpu 0`. Default: empty which means TorchServe collects system metrics via "ts/metrics/metric_collector.py --gpu $CUDA_VISIBLE_DEVICES".

**NOTE**

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import java.io.OutputStream;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import org.apache.commons.io.IOUtils;
Expand All @@ -32,16 +33,23 @@ public MetricCollector(ConfigManager configManager) {
public void run() {
try {
// Collect System level Metrics
String[] args = new String[4];
args[0] = configManager.getPythonExecutable();
args[1] = "ts/metrics/metric_collector.py";
args[2] = "--gpu";
args[3] = String.valueOf(ConfigManager.getInstance().getNumberOfGpu());
List<String> args = new ArrayList<>();
args.add(configManager.getPythonExecutable());
String systemMetricsCmd = configManager.getSystemMetricsCmd();
if (systemMetricsCmd.isEmpty()) {
systemMetricsCmd =
String.format(
"%s --gpu %s",
"ts/metrics/metric_collector.py",
String.valueOf(configManager.getNumberOfGpu()));
}
args.addAll(Arrays.asList(systemMetricsCmd.split("\\s+")));
File workingDir = new File(configManager.getModelServerHome());

String[] envp = EnvironmentUtils.getEnvString(workingDir.getAbsolutePath(), null, null);

final Process p = Runtime.getRuntime().exec(args, envp, workingDir); // NOPMD
final Process p =
Runtime.getRuntime()
.exec(args.toArray(new String[0]), envp, workingDir); // NOPMD
ModelManager modelManager = ModelManager.getInstance();
Map<Integer, WorkerThread> workerMap = modelManager.getWorkers();
try (OutputStream os = p.getOutputStream()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ public final class ConfigManager {
private static final String MODEL_SNAPSHOT = "model_snapshot";
private static final String MODEL_CONFIG = "models";
private static final String VERSION = "version";
private static final String SYSTEM_METRICS_CMD = "system_metrics_cmd";

// Configuration default values
private static final String DEFAULT_TS_ALLOWED_URLS = "file://.*|http(s)?://.*";
Expand Down Expand Up @@ -559,6 +560,10 @@ public String getCertificateFile() {
return prop.getProperty(TS_CERTIFICATE_FILE);
}

public String getSystemMetricsCmd() {
return prop.getProperty(SYSTEM_METRICS_CMD, "");
}

public SslContext getSslContext() throws IOException, GeneralSecurityException {
List<String> supportedCiphers =
Arrays.asList(
Expand Down Expand Up @@ -734,7 +739,9 @@ public String dumpConfigurations() {
+ "\nCPP log config: "
+ (getTsCppLogConfig() == null ? "N/A" : getTsCppLogConfig())
+ "\nModel config: "
+ prop.getProperty(MODEL_CONFIG, "N/A");
+ prop.getProperty(MODEL_CONFIG, "N/A")
+ "\nSystem metrics command: "
+ (getSystemMetricsCmd().isEmpty() ? "default" : getSystemMetricsCmd());
}

public boolean useNativeIo() {
Expand Down

0 comments on commit 1ff1b3b

Please sign in to comment.