From d7965d6dd21279e98d8047243713df43ac5ac2b6 Mon Sep 17 00:00:00 2001 From: David Gardner <96306125+dagardner-nv@users.noreply.github.com> Date: Thu, 6 Feb 2025 09:30:56 -0800 Subject: [PATCH 1/4] Remove out of date documentation (#2173) * Remove documentation references to helm charts which we no longer publish * Replace reference to the MLflow Triton Plugin container on NGC, instead refer users to https://github.com/triton-inference-server/server/tree/main/deploy/mlflow-triton-plugin Closes #2077 ## By Submitting this PR I confirm: - I am familiar with the [Contributing Guidelines](https://github.com/nv-morpheus/Morpheus/blob/main/docs/source/developer_guide/contributing.md). - When the PR is ready for review, new or existing tests cover these changes. - When the PR is ready for review, the documentation is up to date with these changes. Authors: - David Gardner (https://github.com/dagardner-nv) Approvers: - Will Killian (https://github.com/willkill07) URL: https://github.com/nv-morpheus/Morpheus/pull/2173 --- README.md | 4 - ci/release/update-version.sh | 5 - docs/source/cloud_deployment_guide.md | 755 ------------------ .../guides/5_digital_fingerprinting.md | 12 +- docs/source/extra_info/glossary.md | 11 +- docs/source/index.rst | 12 - .../production/README.md | 1 - 7 files changed, 2 insertions(+), 798 deletions(-) delete mode 100644 docs/source/cloud_deployment_guide.md diff --git a/README.md b/README.md index 3814d1089..13d8ca3f1 100644 --- a/README.md +++ b/README.md @@ -34,8 +34,4 @@ NVIDIA Morpheus is an open AI application framework that provides cybersecurity ### Modifying Morpheus * [Contributing to Morpheus](./docs/source/developer_guide/contributing.md) - Covers building from source, making changes and contributing to Morpheus -### Deploying Morpheus -* [Morpheus Cloud Deployment Guide](./docs/source/cloud_deployment_guide.md) - Kubernetes and cloud based deployments - - Full documentation for the latest official release is available at [https://docs.nvidia.com/morpheus/](https://docs.nvidia.com/morpheus/). diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh index feff9d7a2..2a00b52fc 100755 --- a/ci/release/update-version.sh +++ b/ci/release/update-version.sh @@ -101,11 +101,6 @@ sed_runner 's/'"VERSION ${CURRENT_FULL_VERSION}.*"'/'"VERSION ${NEXT_FULL_VERSIO # docs/source/basics/overview.rst sed_runner "s|blob/branch-${CURRENT_SHORT_TAG}|blob/branch-${NEXT_SHORT_TAG}|g" docs/source/basics/overview.rst -# docs/source/cloud_deployment_guide.md -sed_runner "s|${CURRENT_SHORT_TAG}.tgz|${NEXT_SHORT_TAG}.tgz|g" docs/source/cloud_deployment_guide.md -sed_runner "s|blob/branch-${CURRENT_SHORT_TAG}|blob/branch-${NEXT_SHORT_TAG}|g" docs/source/cloud_deployment_guide.md -sed_runner "s|tree/branch-${CURRENT_SHORT_TAG}|tree/branch-${NEXT_SHORT_TAG}|g" docs/source/cloud_deployment_guide.md - # docs/source/developer_guide/guides/5_digital_fingerprinting.md sed_runner "s|blob/branch-${CURRENT_SHORT_TAG}|blob/branch-${NEXT_SHORT_TAG}|g" docs/source/developer_guide/guides/5_digital_fingerprinting.md diff --git a/docs/source/cloud_deployment_guide.md b/docs/source/cloud_deployment_guide.md deleted file mode 100644 index e9c981f84..000000000 --- a/docs/source/cloud_deployment_guide.md +++ /dev/null @@ -1,755 +0,0 @@ - - -# Morpheus Cloud Deployment Guide - -## Table of Contents -- [Introduction](#introduction) -- [Setup](#setup) - - [Prerequisites](#prerequisites) - - [Set up NGC API Key and Install NGC Registry CLI](#set-up-ngc-api-key-and-install-ngc-registry-cli) - - [Create Namespace for Morpheus](#create-namespace-for-morpheus) - - [Install Morpheus AI Engine](#install-morpheus-ai-engine) - - [Install Morpheus SDK Client](#install-morpheus-sdk-client) - - [Morpheus SDK Client in Sleep Mode](#morpheus-sdk-client-in-sleep-mode) - - [Models for MLflow Deployment](#models-for-mlflow-deployment) - - [Install Morpheus MLflow](#install-morpheus-mlflow) - - [Model Deployment](#model-deployment) - - [Verify Model Deployment](#verify-model-deployment) - - [Create Kafka Topics](#create-kafka-topics) -- [Example Workflows](#example-workflows) - - [Run NLP Phishing Detection Pipeline](#run-nlp-phishing-detection-pipeline) - - [Run NLP Sensitive Information Detection Pipeline](#run-nlp-sensitive-information-detection-pipeline) - - [Run FIL Anomalous Behavior Profiling Pipeline](#run-fil-anomalous-behavior-profiling-pipeline) - - [Verify Running Pipeline](#verify-running-pipeline) -- [Prerequisites and Installation for AWS](#prerequisites-and-installation-for-aws) - - [Prerequisites](#prerequisites-1) - - [Install Cloud Native Core Stack for AWS](#install-cloud-native-core-stack-for-aws) -- [Prerequisites and Installation for Ubuntu](#prerequisites-and-installation-for-ubuntu) - - [Prerequisites](#prerequisites-2) -- [Installing Cloud Native Core Stack on NVIDIA Certified Systems](#installing-cloud-native-core-stack-on-nvidia-certified-systems) -- [Kafka Topic Commands](#kafka-topic-commands) -- [Additional Documentation](#additional-documentation) -- [Troubleshooting](#troubleshooting) - - [Common Problems](#common-problems) - - -## Introduction - -This cloud deployment guide provides the necessary instructions to set up the minimum infrastructure and configuration needed to deploy the Morpheus Developer Kit and includes example workflows leveraging the deployment. - -- This cloud deployment guide consists of the following steps: -- Set up of the NVIDIA Cloud Native Core Stack -- Set up Morpheus AI Engine -- Set up Morpheus SDK Client -- Models for MLflow Deployment -- Set up Morpheus MLflow -- Deploy models to Triton inference server -- Create Kafka topics -- Run example workloads - -> **Note**: This guide requires access to the NGC Public Catalog. - -## Setup - -### Prerequisites -1. Refer to prerequisites for Cloud (AWS) [here](#prerequisites-1) or On-Prem (Ubuntu) [here](#prerequisites-2) -2. Registration in the NGC Public Catalog - -Continue with the setup steps below once the host system is installed, configured, and satisfies all prerequisites. - -### Set up NGC API Key and Install NGC Registry CLI - -First, you will need to set up your NGC API Key to access all the Morpheus components, using the linked instructions from the [NGC Registry CLI User Guide](https://docs.nvidia.com/ngc/gpu-cloud/ngc-private-registry-user-guide/index.html#generating-personal-api-key). - -Once you've created your API key, create an environment variable containing your API key for use by the commands used further in this document: - -```bash -export API_KEY="" -``` - -Next, install and configure the NGC Registry CLI on your system using the linked instructions from the [NGC Registry CLI User Guide](https://docs.nvidia.com/ngc/gpu-cloud/ngc-private-registry-user-guide/index.html#generating-personal-api-key). - -### Create Namespace for Morpheus - -Next, create a namespace and an environment variable for the namespace to organize the Kubernetes cluster deployed via the Cloud Native Core Stack and logically separate Morpheus related deployments from other projects using the following command: - -```bash -export NAMESPACE="" -kubectl create namespace ${NAMESPACE} -``` - -### Install Morpheus AI Engine - -The Helm chart (`morpheus-ai-engine`) that offers the auxiliary components required to execute certain Morpheus workflows is referred to as the Morpheus AI Engine. It comprises of the following components -- Triton Inference Server [ **ai-engine** ] from NVIDIA for processing inference requests. -- Kafka Broker [ **broker** ] to consume and publish messages. -- Zookeeper [ **zookeeper** ] to maintain coordination between the Kafka Brokers. - -Follow the below steps to install Morpheus AI Engine: - -```bash -helm fetch https://helm.ngc.nvidia.com/nvidia/morpheus/charts/morpheus-ai-engine-25.02.tgz --username='$oauthtoken' --password=$API_KEY --untar -``` -```bash -helm install --set ngc.apiKey="$API_KEY" \ - --namespace $NAMESPACE \ - \ - morpheus-ai-engine -``` - -After the installation, you can verify that the Kubernetes pods are running successfully using the following command: - -```bash -kubectl -n $NAMESPACE get all -``` - -Output: - -```console -pod/ai-engine-65f59ddcf7-mdmdt 1/1 Running 0 54s -pod/broker-76f7c64dc9-6rldp 1/1 Running 1 54s -pod/zookeeper-87f9f4dd-znjnb 1/1 Running 0 54s - -NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE -service/ai-engine ClusterIP 10.109.56.231 8000/TCP,8001/TCP,8002/TCP 54s -service/broker ClusterIP 10.101.103.250 9092/TCP 54s -service/zookeeper ClusterIP 10.110.55.141 2181/TCP 54s - -NAME READY UP-TO-DATE AVAILABLE AGE -deployment.apps/ai-engine 1/1 1 1 54s -deployment.apps/broker 1/1 1 1 54s -deployment.apps/zookeeper 1/1 1 1 54s - -NAME DESIRED CURRENT READY AGE -replicaset.apps/ai-engine-65f59ddcf7 1 1 1 54s -replicaset.apps/broker-76f7c64dc9 1 1 1 54s -replicaset.apps/zookeeper-87f9f4dd 1 1 1 54s -``` - -### Install Morpheus SDK Client -Run the following command to pull the Morpheus SDK Client (referred to as Helm chart `morpheus-sdk-client`) on to your instance: - -```bash -helm fetch https://helm.ngc.nvidia.com/nvidia/morpheus/charts/morpheus-sdk-client-25.02.tgz --username='$oauthtoken' --password=$API_KEY --untar -``` - -#### Morpheus SDK Client in Sleep Mode -Install the Morpheus SDK client pod in sleep mode to copy its sample datasets and models from the container to a shared location that other pods can access. If no `sdk.args` is supplied, the default value `/bin/sleep infinity` from the chart is used in the following command. - -```bash -helm install --set ngc.apiKey="$API_KEY" \ - --namespace $NAMESPACE \ - helper \ - morpheus-sdk-client -``` - -Check the status of the pod to make sure it's up and running. - -```bash -kubectl -n $NAMESPACE get all | grep sdk-cli-helper -``` - -Output: - -```console -pod/sdk-cli-helper 1/1 Running 0 41s -``` - -### Models for MLflow Deployment - -Connect to the **sdk-cli-helper** container and copy the models to `/common`, which is mapped to `/opt/morpheus/common` on the host and where MLflow will have access to model files. - -```bash -kubectl -n $NAMESPACE exec sdk-cli-helper -- cp -RL /workspace/models /common -``` - -### Install Morpheus MLflow - -The Morpheus MLflow Helm chart offers MLflow server with Triton plugin to deploy, update, and remove models from the Morpheus AI Engine. The MLflow server UI can be accessed using NodePort `30500`. Follow the below steps to install the Morpheus MLflow: - -```bash -helm fetch https://helm.ngc.nvidia.com/nvidia/morpheus/charts/morpheus-mlflow-25.02.tgz --username='$oauthtoken' --password=$API_KEY --untar -``` -```bash -helm install --set ngc.apiKey="$API_KEY" \ - --namespace $NAMESPACE \ - \ - morpheus-mlflow -``` - -> **Note**: If the default port is already allocated, Helm throws below error. Choose an alternative by adjusting the `dashboardPort` value in the `morpheus-mlflow/values.yaml` file, remove the previous release and reinstall it. - -```console -Error: Service "mlflow" is invalid: spec.ports[0].nodePort: Invalid value: 30500: provided port is already allocated -``` - -After the installation, you can verify that the MLflow pod is running successfully using the following command: - -```bash -kubectl -n $NAMESPACE get all | grep pod/mlflow -``` - -Output: -```bash -pod/mlflow-6d98 1/1 Running 0 39s -``` - -### Model Deployment -Attach to the MLflow pod to publish models to the MLflow server and then deploy it onto Morpheus AI Engine: - -```bash -kubectl -n $NAMESPACE exec -it deploy/mlflow -- bash -``` - -```console -(mlflow) root@mlflow-6d98:/mlflow# -``` - -`Important`: When `(mlflow)` is present, commands are directly within the container. - -First let's examine the syntax of the commands we will be using to communicate with the MLflow Triton plugin before we start deploying models. -Publish models to MLflow server is in the form of: - -```bash -(mlflow) root@mlflow-6d98:/mlflow# python publish_model_to_mlflow.py \ - --model_name \ - --model_directory \ - --flavor -``` - -Deploy models to Morpheus AI Engine: - -```bash -(mlflow) root@mlflow-6d98:/mlflow# mlflow deployments create -t triton \ - --flavor \ - --name \ - -m models://1 \ - -C "version=" -``` - -Update deployed models in Morpheus AI Engine: - -``` -(mlflow) root@mlflow-6d98:/mlflow# mlflow deployments update -t triton \ - --flavor \ - --name / \ - -m models:// -``` - -Delete deployed models from Morpheus AI Engine: - -```bash -(mlflow) root@mlflow-6d98:/mlflow# mlflow deployments delete -t triton \ - --name / -``` - -Now that we've figured out how to deploy models let's move on to the next step. Now it's time to deploy the relevant models, which have already been copied to `/opt/morpheus/common/models` which are bound to `/common/models` within the MLflow pod. - -```bash -(mlflow) root@mlflow-6d98:/mlflow# ls -lrt /common/models -``` - -Output: - -```console -drwxr-xr-x 3 ubuntu ubuntu 4096 Apr 13 23:47 sid-minibert-onnx -drwxr-xr-x 2 root root 4096 Apr 21 17:09 abp-models -drwxr-xr-x 4 root root 4096 Apr 21 17:09 datasets -drwxr-xr-x 4 root root 4096 Apr 21 17:09 fraud-detection-models -drwxr-xr-x 2 root root 4096 Apr 21 17:09 dfp-models -drwxr-xr-x 3 root root 4096 Apr 21 17:10 mlflow -drwxr-xr-x 2 root root 4096 Apr 21 17:10 log-parsing-models -drwxr-xr-x 2 root root 4096 Apr 21 17:10 phishing-models -drwxr-xr-x 2 root root 4096 Apr 21 17:10 sid-models -drwxr-xr-x 8 root root 4096 Apr 21 17:10 training-tuning-scripts -drwxr-xr-x 7 root root 4096 Apr 21 17:10 validation-inference-scripts -drwxr-xr-x 7 root root 4096 Apr 21 17:10 triton-model-repo --rw-r--r-- 1 root root 4213 Apr 21 17:10 README.md --rw-r--r-- 1 root root 4862 Apr 21 17:10 model_cards.csv --rw-r--r-- 1 root root 1367 Apr 21 17:10 model-information.csv -``` - - -Publish and deploy sid-minibert-onnx model: - -```bash -(mlflow) root@mlflow-6d98:/mlflow# python publish_model_to_mlflow.py \ - --model_name sid-minibert-onnx \ - --model_directory /common/models/triton-model-repo/sid-minibert-onnx \ - --flavor triton -``` - -```bash -(mlflow) root@mlflow-6d98:/mlflow# mlflow deployments create -t triton \ - --flavor triton \ - --name sid-minibert-onnx \ - -m models:/sid-minibert-onnx/1 \ - -C "version=1" -``` - -Publish and deploy phishing-bert-onnx model: - -```bash -(mlflow) root@mlflow-6d98:/mlflow# python publish_model_to_mlflow.py \ - --model_name phishing-bert-onnx \ - --model_directory /common/models/triton-model-repo/phishing-bert-onnx \ - --flavor triton -``` -```bash -(mlflow) root@mlflow-6d98:/mlflow# mlflow deployments create -t triton \ - --flavor triton \ - --name phishing-bert-onnx \ - -m models:/phishing-bert-onnx/1 \ - -C "version=1" -``` - -Publish and deploy abp-nvsmi-xgb model: - -```bash -(mlflow) root@mlflow-6d98:/mlflow# python publish_model_to_mlflow.py \ - --model_name abp-nvsmi-xgb \ - --model_directory /common/models/triton-model-repo/abp-nvsmi-xgb \ - --flavor triton -``` - -```bash -(mlflow) root@mlflow-6d98:/mlflow# mlflow deployments create -t triton \ - --flavor triton \ - --name abp-nvsmi-xgb \ - -m models:/abp-nvsmi-xgb/1 \ - -C "version=1" -``` - -Exit from the container - -```bash -(mlflow) root@mlflow-6d98:/mlflow# exit -``` - -### Verify Model Deployment -Run the following command to verify that the models were successfully deployed on the AI Engine: - -```bash -kubectl -n $NAMESPACE logs deploy/ai-engine -``` - -Output: -```console -I1202 14:09:03.098085 1 api.cu:79] TRITONBACKEND_ModelInitialize: abp-nvsmi-xgb (version 1) -I1202 14:09:03.101910 1 api.cu:123] TRITONBACKEND_ModelInstanceInitialize: abp-nvsmi-xgb_0 (GPU device 0) -I1202 14:09:03.543719 1 model_instance_state.cu:101] Using GPU for predicting with model 'abp-nvsmi-xgb_0' -I1202 14:09:03.563425 1 api.cu:123] TRITONBACKEND_ModelInstanceInitialize: abp-nvsmi-xgb_0 (GPU device 1) -I1202 14:09:03.980621 1 model_instance_state.cu:101] Using GPU for predicting with model 'abp-nvsmi-xgb_0' -I1202 14:09:03.981678 1 model_repository_manager.cc:1183] successfully loaded 'abp-nvsmi-xgb' version 1 -``` - -### Create Kafka Topics -We will need to create Kafka topics for input and output data to run some of the pipeline examples. - -Check if any Kafka topics exist already. If any exist, you can either delete the previous topics or re-use them. - -```bash -kubectl -n $NAMESPACE exec deploy/broker -c broker -- kafka-topics.sh --list --zookeeper zookeeper:2181 -``` - -Run the following command twice, once to create an input topic, and again to create an output topic, making sure that the input topic and output topic have different names: - -```bash -kubectl -n $NAMESPACE exec deploy/broker -c broker -- kafka-topics.sh \ - --create \ - --bootstrap-server broker:9092 \ - --replication-factor 1 \ - --partitions 3 \ - --topic -``` - -## Example Workflows - -This section describes example workflows to run on Morpheus. Four sample pipelines are provided. - -1. NLP pipeline performing Phishing Detection (PD). -2. NLP pipeline performing Sensitive Information Detection (SID). -3. FIL pipeline performing Anomalous Behavior Profiling (ABP). - -Multiple command options are given for each pipeline, with varying data input/output methods, ranging from local files to Kafka Topics. - -We recommend only deploying one pipeline at a time. To remove previously deployed pipelines, run the following command: - -```bash -helm delete -n $NAMESPACE -``` - -To publish messages to a Kafka topic, we need to copy datasets to locations where they can be accessed from the host. - -```bash -kubectl -n $NAMESPACE exec sdk-cli-helper -- cp -R /workspace/examples/data /common -``` - -Refer to the [Morpheus CLI Overview](./basics/overview.rst) and [Building a Pipeline](./basics/building_a_pipeline.md) documentation for more information regarding the commands. - -> **Note**: Before running the example pipelines, ensure the criteria below are met: -- Ensure models specific to the pipeline are deployed. -- Input and Output Kafka topics have been created. -- Recommended to create an output directory under `/opt/morpheus/common/data` which is bound to `/common/data` (pod/container) for storing inference or validation results. -- Replace **** with your directory name. -- Replace **** with your input Kafka topic name. -- Replace **** with your output Kafka topic name. -- Replace **** with the name you want. - - -For reference, the Morpheus SDK Client install pipeline command template is provided. We will examine this further in the [example workflows](#example-workflows) section, but for now, let's proceed to the next step. - -```bash -helm install --set ngc.apiKey="$API_KEY" \ - --set sdk.args="" \ - --namespace $NAMESPACE \ - \ - morpheus-sdk-client -``` - -### Run NLP Phishing Detection Pipeline - -The following Phishing Detection pipeline examples use a pre-trained NLP model to analyze emails (body) and determine phishing or benign. Here is the sample data as shown below is used to pass as an input to the pipeline. - -```json -{"data":"Abedin Huma Wednesday July 15 2009 1:44 PMRe: ArtWill be off campus at meetingBut you should definitely come I think they have found some good things."} -{"data":"See NIMills Cheryl D Saturday December 112010 1:36 PMFw: S is calling Leahy today - thx for all the help; advise if a diff no for him today"} -{"data":"Here is Draft"} -{"data":"Ok"} -``` - -Pipeline example to read data from a file, run inference using a `phishing-bert-onnx` model, and write inference results to the specified output file: - -```bash -helm install --set ngc.apiKey="$API_KEY" \ - --set sdk.args="morpheus --log_level=DEBUG run \ - --edge_buffer_size=4 \ - --pipeline_batch_size=1024 \ - --model_max_batch_size=32 \ - pipeline-nlp \ - --model_seq_length=128 \ - --labels_file=data/labels_phishing.txt \ - from-file --filename=./examples/data/email.jsonlines \ - monitor --description 'FromFile Rate' --smoothing=0.001 \ - deserialize \ - preprocess --vocab_hash_file=data/bert-base-uncased-hash.txt --truncation=True --do_lower_case=True --add_special_tokens=False \ - monitor --description 'Preprocess Rate' \ - inf-triton --model_name=phishing-bert-onnx --server_url=ai-engine:8000 --force_convert_inputs=True \ - monitor --description 'Inference Rate' --smoothing=0.001 --unit inf \ - add-class --label=is_phishing --threshold=0.7 \ - serialize \ - to-file --filename=/common/data//phishing-bert-onnx-output.jsonlines --overwrite" \ - --namespace $NAMESPACE \ - \ - morpheus-sdk-client -``` - -When the pipeline runs successfully, an output file `phishing-bert-onnx-output.jsonlines` will appear in the output directory. - -Pipeline example to read messages from an input Kafka topic, run inference using a `phishing-bert-onnx` model, and write the results of the inference to an output Kafka topic: - -```bash -helm install --set ngc.apiKey="$API_KEY" \ - --set sdk.args="morpheus --log_level=DEBUG run \ - --edge_buffer_size=4 \ - --pipeline_batch_size=1024 \ - --model_max_batch_size=32 \ - pipeline-nlp \ - --model_seq_length=128 \ - --labels_file=data/labels_phishing.txt \ - from-kafka --input_topic --bootstrap_servers broker:9092 \ - monitor --description 'FromKafka Rate' --smoothing=0.001 \ - deserialize \ - preprocess --vocab_hash_file=data/bert-base-uncased-hash.txt --truncation=True --do_lower_case=True --add_special_tokens=False \ - monitor --description 'Preprocess Rate' \ - inf-triton --force_convert_inputs=True --model_name=phishing-bert-onnx --server_url=ai-engine:8000 \ - monitor --description='Inference Rate' --smoothing=0.001 --unit inf \ - add-class --label=is_phishing --threshold=0.7 \ - serialize --exclude '^ts_' \ - to-kafka --output_topic --bootstrap_servers broker:9092" \ - --namespace $NAMESPACE \ - \ - morpheus-sdk-client -``` - -Make sure you create input and output Kafka topics before you start the pipeline. After the pipeline has been started, load the individual corresponding data files from the downloaded sample into the selected input topic using the command below: - -```bash -kubectl -n $NAMESPACE exec -it deploy/broker -c broker -- kafka-console-producer.sh \ - --broker-list broker:9092 \ - --topic < \ - -``` - -> **Note**: This should be used for development purposes only via this developer kit. Loading from the file into Kafka should not be used in production deployments of Morpheus. - -### Run NLP Sensitive Information Detection Pipeline -The following Sensitive Information Detection pipeline examples use a pre-trained NLP model to ingest and analyze PCAP (packet capture network traffic) input sample data, like the example below, to inspect IP traffic across data center networks. - -```json -{"timestamp": 1616380971990, "host_ip": "10.188.40.56", "data_len": "309", "data": "POST /simpledatagen/ HTTP/1.1\r\nHost: echo.gtc1.netqdev.cumulusnetworks.com\r\nUser-Agent: python-requests/2.22.0\r\nAccept-Encoding: gzip, deflate\r\nAccept: */*\r\nConnection: keep-alive\r\nContent-Length: 73\r\nContent-Type: application/json\r\n\r\n", "src_mac": "04:3f:72:bf:af:74", "dest_mac": "b4:a9:fc:3c:46:f8", "protocol": "6", "src_ip": "10.20.16.248", "dest_ip": "10.244.0.59", "src_port": "50410", "dest_port": "80", "flags": "24", "is_pii": false} -{"timestamp": 1616380971991, "host_ip": "10.188.40.56", "data_len": "139", "data": "\"{\\\"markerEmail\\\": \\\"FuRLFaAZ identify benefit BneiMvCZ join 92694759\\\"}\"", "src_mac": "04:3f:72:bf:af:74", "dest_mac": "b4:a9:fc:3c:46:f8", "protocol": "6", "src_ip": "10.244.0.1", "dest_ip": "10.244.0.25", "src_port": "50410", "dest_port": "80", "flags": "24", "is_pii": false} -``` - -Pipeline example to read data from a file, run inference using a `sid-minibert-onnx` model, and write inference results to the specified output file: - -```bash -helm install --set ngc.apiKey="$API_KEY" \ - --set sdk.args="morpheus --log_level=DEBUG run \ - --edge_buffer_size=4 \ - --pipeline_batch_size=1024 \ - --model_max_batch_size=32 \ - pipeline-nlp \ - --model_seq_length=256 \ - from-file --filename=./examples/data/pcap_dump.jsonlines \ - monitor --description 'FromFile Rate' --smoothing=0.001 \ - deserialize \ - preprocess --vocab_hash_file=data/bert-base-uncased-hash.txt --truncation=True --do_lower_case=True --add_special_tokens=False \ - monitor --description='Preprocessing rate' \ - inf-triton --force_convert_inputs=True --model_name=sid-minibert-onnx --server_url=ai-engine:8000 \ - monitor --description='Inference rate' --smoothing=0.001 --unit inf \ - add-class \ - serialize --exclude '^ts_' \ - to-file --filename=/common/data//sid-minibert-onnx-output.jsonlines --overwrite" \ - --namespace $NAMESPACE \ - \ - morpheus-sdk-client -``` - -When the pipeline runs successfully, an output file *sid-minibert-onnx-output.jsonlines* will appear in the output directory. - -Pipeline example to read messages from an input Kafka topic, run inference using a *sid-minibert-onnx* model, and write the results of the inference to an output Kafka topic: - -```bash -helm install --set ngc.apiKey="$API_KEY" \ - --set sdk.args="morpheus --log_level=DEBUG run \ - --edge_buffer_size=4 \ - --pipeline_batch_size=1024 \ - --model_max_batch_size=32 \ - pipeline-nlp \ - --model_seq_length=256 \ - from-kafka --input_topic --bootstrap_servers broker:9092 \ - monitor --description 'FromKafka Rate' --smoothing=0.001 \ - deserialize \ - preprocess --vocab_hash_file=data/bert-base-uncased-hash.txt --truncation=True --do_lower_case=True --add_special_tokens=False \ - monitor --description='Preprocessing Rate' \ - inf-triton --force_convert_inputs=True --model_name=sid-minibert-onnx --server_url=ai-engine:8000 \ - monitor --description='Inference Rate' --smoothing=0.001 --unit inf \ - add-class \ - serialize --exclude '^ts_' \ - to-kafka --output_topic --bootstrap_servers broker:9092" \ - --namespace $NAMESPACE \ - \ - morpheus-sdk-client -``` - -Make sure you create input and output Kafka topics before you start the pipeline. After the pipeline has been started, load the individual corresponding data files from the downloaded sample into the selected input topic using the command below: - -```bash -kubectl -n $NAMESPACE exec -it deploy/broker -c broker -- kafka-console-producer.sh \ - --broker-list broker:9092 \ - --topic < \ - -``` - -> **Note**: This should be used for development purposes only via this developer kit. Loading from the file into Kafka should not be used in production deployments of Morpheus. - -### Run FIL Anomalous Behavior Profiling Pipeline -The following Anomalous Behavior Profiling pipeline examples use a pre-trained FIL model to ingest and analyze NVIDIA System Management Interface (`nvidia-smi`) logs, like the example below, as input sample data to identify cryptocurrency mining activity on GPU devices. - -```json -{"nvidia_smi_log.gpu.pci.tx_util": "0 KB/s", "nvidia_smi_log.gpu.pci.rx_util": "0 KB/s", "nvidia_smi_log.gpu.fb_memory_usage.used": "3980 MiB", "nvidia_smi_log.gpu.fb_memory_usage.free": "12180 MiB", "nvidia_smi_log.gpu.bar1_memory_usage.total": "16384 MiB", "nvidia_smi_log.gpu.bar1_memory_usage.used": "11 MiB", "nvidia_smi_log.gpu.bar1_memory_usage.free": "16373 MiB", "nvidia_smi_log.gpu.utilization.gpu_util": "0 %", "nvidia_smi_log.gpu.utilization.memory_util": "0 %", "nvidia_smi_log.gpu.temperature.gpu_temp": "61 C", "nvidia_smi_log.gpu.temperature.gpu_temp_max_threshold": "90 C", "nvidia_smi_log.gpu.temperature.gpu_temp_slow_threshold": "87 C", "nvidia_smi_log.gpu.temperature.gpu_temp_max_gpu_threshold": "83 C", "nvidia_smi_log.gpu.temperature.memory_temp": "57 C", "nvidia_smi_log.gpu.temperature.gpu_temp_max_mem_threshold": "85 C", "nvidia_smi_log.gpu.power_readings.power_draw": "61.77 W", "nvidia_smi_log.gpu.clocks.graphics_clock": "1530 MHz", "nvidia_smi_log.gpu.clocks.sm_clock": "1530 MHz", "nvidia_smi_log.gpu.clocks.mem_clock": "877 MHz", "nvidia_smi_log.gpu.clocks.video_clock": "1372 MHz", "nvidia_smi_log.gpu.applications_clocks.graphics_clock": "1312 MHz", "nvidia_smi_log.gpu.applications_clocks.mem_clock": "877 MHz", "nvidia_smi_log.gpu.default_applications_clocks.graphics_clock": "1312 MHz", "nvidia_smi_log.gpu.default_applications_clocks.mem_clock": "877 MHz", "nvidia_smi_log.gpu.max_clocks.graphics_clock": "1530 MHz", "nvidia_smi_log.gpu.max_clocks.sm_clock": "1530 MHz", "nvidia_smi_log.gpu.max_clocks.mem_clock": "877 MHz", "nvidia_smi_log.gpu.max_clocks.video_clock": "1372 MHz", "nvidia_smi_log.gpu.max_customer_boost_clocks.graphics_clock": "1530 MHz", "nvidia_smi_log.gpu.processes.process_info.0.process_name": "python", "nvidia_smi_log.gpu.processes.process_info.1.process_name": "tritonserver", "hostname": "ip-10-100-8-98", "timestamp": 1615542360.9566503} -``` - -Pipeline example to read data from a file, run inference using an `abp-nvsmi-xgb` model, and write inference results to the specified output file. - -```bash -helm install --set ngc.apiKey="$API_KEY" \ - --set sdk.args="morpheus --log_level=DEBUG run \ - --edge_buffer_size=4 \ - --pipeline_batch_size=1024 \ - --model_max_batch_size=64 \ - pipeline-fil --columns_file=data/columns_fil.txt \ - from-file --filename=./examples/data/nvsmi.jsonlines \ - monitor --description 'FromFile Rate' --smoothing=0.001 \ - deserialize \ - preprocess \ - monitor --description='Preprocessing Rate' \ - inf-triton --model_name=abp-nvsmi-xgb --server_url=ai-engine:8000 --force_convert_inputs=True \ - monitor --description='Inference Rate' --smoothing=0.001 --unit inf \ - add-class \ - serialize --exclude '^nvidia_smi_log' --exclude '^ts_' \ - to-file --filename=/common/data//abp-nvsmi-xgb-output.jsonlines --overwrite" \ - --namespace $NAMESPACE \ - \ - morpheus-sdk-client -``` - -Pipeline example to read messages from an input Kafka topic, run inference using an `abp-nvsmi-xgb` model, and write the results of the inference to an output Kafka topic: - -```bash -helm install --set ngc.apiKey="$API_KEY" \ - --set sdk.args="morpheus --log_level=DEBUG run \ - --pipeline_batch_size=1024 \ - --model_max_batch_size=64 \ - pipeline-fil --columns_file=data/columns_fil.txt \ - from-kafka --input_topic --bootstrap_servers broker:9092 \ - monitor --description 'FromKafka Rate' --smoothing=0.001 \ - deserialize \ - preprocess \ - monitor --description='Preprocessing Rate' \ - inf-triton --model_name=abp-nvsmi-xgb --server_url=ai-engine:8000 --force_convert_inputs=True \ - monitor --description='Inference Rate' --smoothing=0.001 --unit inf \ - add-class \ - serialize --exclude '^nvidia_smi_log' \ --exclude '^ts_' \ - to-kafka --output_topic --bootstrap_servers broker:9092" \ - --namespace $NAMESPACE \ - \ - morpheus-sdk-client -``` - -Make sure you create input and output Kafka topics before you start the pipeline. After the pipeline has been started, load the individual corresponding data files from the downloaded sample into the selected input topic using the command below: - -```bash -kubectl -n $NAMESPACE exec -it deploy/broker -c broker -- kafka-console-producer.sh \ - --broker-list broker:9092 \ - --topic < \ - -``` - -> **Note**: This should be used for development purposes only via this developer kit. Loading from the file into Kafka should not be used in production deployments of Morpheus. - -### Verify Running Pipeline -Once you've deployed the SDK client to run a pipeline, you can check the status of the pod using the following command: - -```bash -kubectl -n $NAMESPACE get pods sdk-cli- -NAME READY STATUS RESTARTS AGE -sdk-cli-6c9575f648-gfdd2 1/1 Running 0 3m23s -``` - -Then check that the pipeline is running successfully using the following command: - -```bash -kubectl -n $NAMESPACE logs sdk-cli- -``` - -Output: - -```console -Configuring Pipeline via CLI -Starting pipeline via CLI... Ctrl+C to Quit -Preprocessing rate: 7051messages [00:09, 4372.75messages/s] -Inference rate: 7051messages [00:04, 4639.40messages/s] -``` - -## Prerequisites and Installation for AWS - -### Prerequisites -1. AWS account with the ability to create/modify EC2 instances -2. AWS EC2 G4 instance with T4 or V100 GPU, at least 64GB RAM, 8 cores CPU, and 100 GB storage. - -### Install Cloud Native Core Stack for AWS -On your AWS EC2 G4 instance, follow the instructions in the linked document to install [NVIDIA's Cloud Native Core Stack for AWS](https://github.com/NVIDIA/cloud-native-core). - -## Prerequisites and Installation for Ubuntu - -### Prerequisites -1. NVIDIA-Certified System -2. NVIDIA Volta GPU or newer (Compute Capability >= 7.0) -3. Ubuntu 20.04 LTS or newer - -## Installing Cloud Native Core Stack on NVIDIA Certified Systems -On your NVIDIA-Certified System, follow the instructions in the linked document to install [NVIDIA's Cloud Native Core Stack](https://github.com/NVIDIA/cloud-native-core). - -## Kafka Topic Commands - -List available Kafka topics. - -```bash -kubectl -n $NAMESPACE exec deploy/broker -c broker -- kafka-topics.sh \ - --list --zookeeper zookeeper:2181 -``` - -Create a partitioned Kafka topic with a single replication factor. - -```bash -kubectl -n $NAMESPACE exec deploy/broker -c broker -- kafka-topics.sh \ - --create \ - --bootstrap-server broker:9092 \ - --replication-factor 1 \ - --partitions 1 \ - --topic -``` - -Load data from a file to Kafka topic: - -```bash -kubectl -n $NAMESPACE exec -it deploy/broker -c broker -- kafka-console-producer.sh \ - --broker-list broker:9092 \ - --topic < \ - -``` - -> **Note**: This should be used for development purposes only via this developer kit. Loading from the file into Kafka should not be used in production deployments of Morpheus. - - -Consume messages from Kafka topic: - -```bash -kubectl -n $NAMESPACE exec deploy/broker -c broker -- kafka-console-consumer.sh \ - --bootstrap-server broker:9092 \ - --topic \ - --group -``` - -Delete Kafka topic: - -```bash -kubectl -n $NAMESPACE exec deploy/broker -c broker -- kafka-topics.sh \ - --delete --zookeeper zookeeper:2181 \ - --topic -``` - -## Additional Documentation -For more information on how to use the Morpheus Python API to customize and run your own optimized AI pipelines, Refer to below documentation. -- [Morpheus Developer Guides](./developer_guide/guides.md) -- [Morpheus Pipeline Examples](./examples.md) - - -## Troubleshooting -This section lists solutions to problems you might encounter with Morpheus or from its supporting components. - -### Common Problems - -- Models Unloaded After Reboot - - When the pod is restarted, K8s will not automatically load the models. Since models are deployed to *ai-engine* in explicit mode using MLflow, we'd have to manually deploy them again using the [Model Deployment](#model-deployment) process. -- AI Engine CPU Only Mode - - After a server restart, the ai-engine pod on k8s can start up before the GPU operator infrastructure is available, making it "think" there is no driver installed (that is, CPU -only mode). -- Improve Pipeline Message Processing Rate - - Below settings need to be considered - - Provide the workflow with the optimal number of threads (`—num threads`), as having more or fewer threads can have an impact on pipeline performance. - - Consider adjusting `pipeline_batch_size` and `model_max_batch_size` -- Kafka Message Offset Commit Fail - - Error Message - ```console - 1649207839.253|COMMITFAIL|rdkafka#consumer-2| [thrd:main]: Offset commit (manual) failed for 1/1 partition(s) in join-state wait-unassign-call: Broker: Unknown member: topic[0]@112071(Broker: Unknown member) - ``` - - Problem: If the standalone Kafka cluster is receiving significant message throughput from the producer, this error may happen. - - - Solution: Reinstall the Morpheus workflow and reduce the Kafka topic's message retention time and message producing rate. diff --git a/docs/source/developer_guide/guides/5_digital_fingerprinting.md b/docs/source/developer_guide/guides/5_digital_fingerprinting.md index 17e25aceb..4d829865b 100644 --- a/docs/source/developer_guide/guides/5_digital_fingerprinting.md +++ b/docs/source/developer_guide/guides/5_digital_fingerprinting.md @@ -56,7 +56,6 @@ Key Features: * Uses a model store to allow the training and inference pipelines to communicate * Organized into a docker-compose deployment for easy startup * Contains a Jupyter notebook service to ease development and debugging - * Can be deployed to Kubernetes using provided Helm charts * Uses many customized stages to maximize performance. This example is described in `examples/digital_fingerprinting/production/README.md` as well as the rest of this document. @@ -121,9 +120,7 @@ DFP in Morpheus is accomplished via two independent pipelines: training and infe ## Runtime Environment Setup ![Runtime Environment Setup](img/dfp_runtime_env.png) -DFP in Morpheus is built as an application of containerized services​ and can be run in two ways: -1. Using docker-compose for testing and development​ -1. Using helm charts for production Kubernetes deployment​ +DFP in Morpheus is built as an application of containerized services​ and can be run using `docker-compose` for testing and development​. ### Services The reference architecture is composed of the following services:​ @@ -162,12 +159,5 @@ From the `examples/digital_fingerprinting/production` dir, run: docker compose up mlflow ``` -### Running via Kubernetes​ -#### System requirements -* [Kubernetes](https://kubernetes.io/) cluster configured with GPU resources​ -* [NVIDIA GPU Operator](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/gpu-operator) installed in the cluster - -> **Note:** For GPU Requirements refer to the [Getting Started](../../getting_started.md#requirements) guide. - ## Customizing DFP For details on customizing the DFP pipeline refer to [Digital Fingerprinting (DFP) Reference](./6_digital_fingerprinting_reference.md). diff --git a/docs/source/extra_info/glossary.md b/docs/source/extra_info/glossary.md index 7a51d9076..3f2025b46 100644 --- a/docs/source/extra_info/glossary.md +++ b/docs/source/extra_info/glossary.md @@ -19,20 +19,11 @@ limitations under the License. ## MLflow Triton Plugin -Docker container published on [NGC](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/morpheus/containers/mlflow-triton-plugin), allowing the deployment of models in [MLflow](https://mlflow.org/) to [Triton Inference Server](#triton-inference-server). This is also available as a [Helm Chart](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/morpheus/helm-charts/morpheus-mlflow). +[MLflow](https://mlflow.org/) plugin for deploying your models from MLflow to [Triton Inference Server](#triton-inference-server). Refer to [`mlflow-triton-plugin`](https://github.com/triton-inference-server/server/tree/main/deploy/mlflow-triton-plugin) for more information. ## module A Morpheus module is a type of work unit that can be utilized in the Morpheus stage and can be registered to a MRC segment module registry. Modules are beneficial when there is a possibility for the work-unit to be reused. -## Morpheus AI Engine -A Helm Chart for deploying the infrastructure of Morpheus. It includes the [Triton Inference Server](#triton-inference-server), Kafka, and Zookeeper. Refer to [https://catalog.ngc.nvidia.com/orgs/nvidia/teams/morpheus/helm-charts/morpheus-ai-engine](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/morpheus/helm-charts/morpheus-ai-engine). - -## Morpheus SDK CLI -A Helm Chart that deploys the Morpheus container. Refer to [https://catalog.ngc.nvidia.com/orgs/nvidia/teams/morpheus/helm-charts/morpheus-sdk-client](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/morpheus/helm-charts/morpheus-sdk-client) - -## `morpheus-sdk-client` -Another name for the [Morpheus SDK CLI](#morpheus-sdk-cli) Helm Chart. - ## MRC [Morpheus Runtime Core (MRC)](https://github.com/nv-morpheus/MRC). Pipelines in MRC are low level representations of Morpheus [pipelines](#pipeline). diff --git a/docs/source/index.rst b/docs/source/index.rst index 8d7e7d2b0..eed1981bb 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -66,11 +66,6 @@ Modifying Morpheus ^^^^^^^^^^^^^^^^^^ * :doc:`developer_guide/contributing` - Covers building from source, making changes and contributing to Morpheus -Deploying Morpheus -^^^^^^^^^^^^^^^^^^ - * :doc:`cloud_deployment_guide` - Kubernetes and cloud based deployments - - .. toctree:: :caption: Using Morpheus @@ -94,13 +89,6 @@ Deploying Morpheus developer_guide/architecture developer_guide/contributing -.. toctree:: - :caption: Deploying Morpheus - :maxdepth: 20 - :hidden: - - cloud_deployment_guide - .. toctree:: :caption: API :maxdepth: 20 diff --git a/examples/digital_fingerprinting/production/README.md b/examples/digital_fingerprinting/production/README.md index c9c9d5d18..cc1ba41c6 100644 --- a/examples/digital_fingerprinting/production/README.md +++ b/examples/digital_fingerprinting/production/README.md @@ -24,7 +24,6 @@ Key Features: * Uses a model store to allow the training and inference pipelines to communicate * Organized into a `docker compose` deployment for easy startup * Contains a Jupyter notebook service to ease development and debugging - * Can be deployed to Kubernetes using provided Helm charts * Uses many customized stages to maximize performance. ## Building and Running via `docker compose` From 3c3c2f5f4d2421b7725377a0eb3190eac650f6f2 Mon Sep 17 00:00:00 2001 From: David Gardner <96306125+dagardner-nv@users.noreply.github.com> Date: Thu, 6 Feb 2025 11:51:14 -0800 Subject: [PATCH 2/4] Perform `apt upgrade` during docker build of the models container (#2174) * Perform an `apt update` and `apt upgrade` during the Docker build picking up any security fixes available. * Add `urllib3` to `ci/release/download_deps.py` (unrelated fix) ## By Submitting this PR I confirm: - I am familiar with the [Contributing Guidelines](https://github.com/nv-morpheus/Morpheus/blob/main/docs/source/developer_guide/contributing.md). - When the PR is ready for review, new or existing tests cover these changes. - When the PR is ready for review, the documentation is up to date with these changes. Authors: - David Gardner (https://github.com/dagardner-nv) Approvers: - Will Killian (https://github.com/willkill07) - Tad ZeMicheal (https://github.com/tzemicheal) URL: https://github.com/nv-morpheus/Morpheus/pull/2174 --- ci/release/download_deps.py | 9 +++++---- models/docker/Dockerfile | 6 ++++++ 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/ci/release/download_deps.py b/ci/release/download_deps.py index afed42465..8b3e5e55d 100755 --- a/ci/release/download_deps.py +++ b/ci/release/download_deps.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -99,6 +99,7 @@ 'pytorch': 'https://github.com/pytorch/pytorch', 'tqdm': 'https://github.com/tqdm/tqdm', 'typing_utils': 'https://github.com/bojiang/typing_utils', + 'urllib3': 'https://github.com/urllib3/urllib3', 'versioneer-518': 'https://github.com/python-versioneer/versioneer-518', 'watchdog': 'https://github.com/gorakhargosh/watchdog', 'websockets': 'https://github.com/python-websockets/websockets', @@ -112,9 +113,8 @@ OTHER_REPOS: dict[str, PACKAGE_TO_URL_FN_T] = { # While boost is available on GitHub, the sub-libraries are in separate repos. 'beautifulsoup4': - lambda name, - ver: ("https://www.crummy.com/software/BeautifulSoup/bs4/download/" - f"{'.'.join(ver.split('.')[:-1])}/{name}-{ver}.tar.gz"), + lambda name, ver: ("https://www.crummy.com/software/BeautifulSoup/bs4/download/" + f"{'.'.join(ver.split('.')[:-1])}/{name}-{ver}.tar.gz"), } # Please keep sorted @@ -164,6 +164,7 @@ 'python-versioneer': TAG_BARE, 'scikit-learn': TAG_BARE, 'sqlalchemy': lambda ver: f"rel_{ver.replace('.', '_')}", + 'urllib3': TAG_BARE, 'websockets': TAG_BARE, } diff --git a/models/docker/Dockerfile b/models/docker/Dockerfile index e94cdec61..2a7c4b322 100644 --- a/models/docker/Dockerfile +++ b/models/docker/Dockerfile @@ -22,6 +22,12 @@ ARG MORPHEUS_ROOT_HOST WORKDIR / +RUN apt update && \ + DEBIAN_FRONTEND=noninteractive TZ=Etc/UTC \ + apt upgrade -y && \ + apt clean && \ + rm -rf /var/lib/apt/lists/* + # Copy the model repository COPY "${MORPHEUS_ROOT_HOST}/models" "./models" From b2ef4ac89031afa62dca3cd2d82933c69422d6d5 Mon Sep 17 00:00:00 2001 From: David Gardner <96306125+dagardner-nv@users.noreply.github.com> Date: Thu, 6 Feb 2025 14:55:08 -0800 Subject: [PATCH 3/4] Include a Third Party OSS notice in the entrypoint banner (#2176) * Direct users to the main branch of https://github.com/nv-morpheus/morpheus_third_party_oss not a versioned branch * Add a third party notice to the models container * Remove `ci/release/download_deps.py` in favor of adopting the Rapids tools. ## By Submitting this PR I confirm: - I am familiar with the [Contributing Guidelines](https://github.com/nv-morpheus/Morpheus/blob/main/docs/source/developer_guide/contributing.md). - When the PR is ready for review, new or existing tests cover these changes. - When the PR is ready for review, the documentation is up to date with these changes. Authors: - David Gardner (https://github.com/dagardner-nv) Approvers: - Yuchen Zhang (https://github.com/yczhang-nv) - Tad ZeMicheal (https://github.com/tzemicheal) URL: https://github.com/nv-morpheus/Morpheus/pull/2176 --- .dockerignore | 3 + ci/release/download_deps.py | 227 ------------------ ci/release/update-version.sh | 3 - docker/entrypoint.sh | 11 +- models/docker/Dockerfile | 3 + thirdparty/README.md | 2 +- .../models-container-thirdparty-oss.txt | 7 + .../morpheus-container-thirdparty-oss.txt | 8 + 8 files changed, 29 insertions(+), 235 deletions(-) delete mode 100755 ci/release/download_deps.py create mode 100644 thirdparty/models-container-thirdparty-oss.txt create mode 100644 thirdparty/morpheus-container-thirdparty-oss.txt diff --git a/.dockerignore b/.dockerignore index 1fa26a32a..662765124 100644 --- a/.dockerignore +++ b/.dockerignore @@ -14,3 +14,6 @@ docker/commands.sh *.egg-info/ kafka-docker + +# Ignore the models docker files +models/docker diff --git a/ci/release/download_deps.py b/ci/release/download_deps.py deleted file mode 100755 index 8b3e5e55d..000000000 --- a/ci/release/download_deps.py +++ /dev/null @@ -1,227 +0,0 @@ -#!/usr/bin/env python3 -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -Script to automate downloading of source code for third party dependencies - -Intentionally using as few third-party dependencies as possible to allow running this script outside of a Morpheus -Conda environment. -""" - -import logging -import os -import shutil -import sys -import tempfile - -SCRIPT_DIR = os.path.relpath(os.path.dirname(__file__)) -PROJ_ROOT = os.path.dirname(os.path.dirname(SCRIPT_DIR)) -UTILITIES_RELEASE_DIR = os.path.join(PROJ_ROOT, "external/utilities/ci/release") - -sys.path.append(UTILITIES_RELEASE_DIR) -# pylint: disable=wrong-import-position -from download_deps_lib import PACKAGE_TO_URL_FN_T # noqa: E402 -from download_deps_lib import TAG_BARE # noqa: E402 -from download_deps_lib import TAG_NAME_DASH_BARE # noqa: E402 -from download_deps_lib import download_source_deps # noqa: E402 -from download_deps_lib import parse_args # noqa: E402 - -# pylint: enable=wrong-import-position - -CONDA_JSON_CMD = "./docker/run_container_release.sh conda list --json > .tmp/container_pkgs.json" - -# In some cases multiple packages are derived from a single upstream repo, please keep sorted -PACKAGE_ALIASES = { # : - "elasticsearch": "elasticsearch-py", - "grpcio": "grpc", - "grpcio-status": "grpc", - "milvus": "milvus-lite", - "nlohmann_json": "json", - 'python': 'cpython', - "python-confluent-kafka": "confluent-kafka-python", - "python-graphviz": "graphviz", - "torch": "pytorch", - 'versioneer': 'python-versioneer', -} - -KNOWN_GITHUB_URLS = { # : , please keep sorted - 'appdirs': 'https://github.com/ActiveState/appdirs', - 'c-ares': 'https://github.com/c-ares/c-ares', - 'click': 'https://github.com/pallets/click', - 'confluent-kafka-python': 'https://github.com/confluentinc/confluent-kafka-python', - 'cpython': 'https://github.com/python/cpython', - 'cupy': 'https://github.com/cupy/cupy', - 'databricks-cli': 'https://github.com/databricks/databricks-cli', - 'datacompy': 'https://github.com/capitalone/datacompy', - 'dfencoder': 'https://github.com/AlliedToasters/dfencoder', - 'dill': 'https://github.com/uqfoundation/dill', - 'docker-py': 'https://github.com/docker/docker-py', - 'elasticsearch-py': 'https://github.com/elastic/elasticsearch-py', - 'feedparser': 'https://github.com/kurtmckee/feedparser', - 'gflags': 'https://github.com/gflags/gflags', - 'glog': 'https://github.com/google/glog', - 'graphviz': 'https://github.com/xflr6/graphviz', - 'grpc': 'https://github.com/grpc/grpc', - 'json': 'https://github.com/nlohmann/json', - 'librdkafka': 'https://github.com/confluentinc/librdkafka', - 'libwebp': 'https://github.com/webmproject/libwebp', - 'mlflow': 'https://github.com/mlflow/mlflow', - 'networkx': 'https://github.com/networkx/networkx', - 'numpydoc': 'https://github.com/numpy/numpydoc', - 'nv-RxCpp': 'https://github.com/mdemoret-nv/RxCpp', - 'pip': 'https://github.com/pypa/pip', - 'pluggy': 'https://github.com/pytest-dev/pluggy', - 'protobuf': 'https://github.com/protocolbuffers/protobuf', - 'pybind11': 'https://github.com/pybind/pybind11', - 'pybind11-stubgen': 'https://github.com/sizmailov/pybind11-stubgen', - 'pydantic': 'https://github.com/pydantic/pydantic', - 'pymilvus': 'https://github.com/milvus-io/pymilvus', - 'python-versioneer': 'https://github.com/python-versioneer/python-versioneer', - 'rapidjson': 'https://github.com/Tencent/rapidjson', - 'rdma-core': 'https://github.com/linux-rdma/rdma-core', - 'requests': 'https://github.com/psf/requests', - 'requests-cache': 'https://github.com/requests-cache/requests-cache', - 'RxCpp': 'https://github.com/ReactiveX/RxCpp', - 'scikit-learn': 'https://github.com/scikit-learn/scikit-learn', - 'sqlalchemy': 'https://github.com/sqlalchemy/sqlalchemy', - 'pytorch': 'https://github.com/pytorch/pytorch', - 'tqdm': 'https://github.com/tqdm/tqdm', - 'typing_utils': 'https://github.com/bojiang/typing_utils', - 'urllib3': 'https://github.com/urllib3/urllib3', - 'versioneer-518': 'https://github.com/python-versioneer/versioneer-518', - 'watchdog': 'https://github.com/gorakhargosh/watchdog', - 'websockets': 'https://github.com/python-websockets/websockets', - 'zlib': 'https://github.com/madler/zlib', -} - -KNOWN_GITLAB_URLS = { - 'pkg-config': 'https://gitlab.freedesktop.org/pkg-config/pkg-config', -} - -OTHER_REPOS: dict[str, PACKAGE_TO_URL_FN_T] = { - # While boost is available on GitHub, the sub-libraries are in separate repos. - 'beautifulsoup4': - lambda name, ver: ("https://www.crummy.com/software/BeautifulSoup/bs4/download/" - f"{'.'.join(ver.split('.')[:-1])}/{name}-{ver}.tar.gz"), -} - -# Please keep sorted -KNOWN_FIRST_PARTY = frozenset( - {'cuda-cudart', 'cuda-nvrtc', 'cuda-nvtx', 'cuda-version', 'cudf', 'mrc', 'rapids-dask-dependency', 'tritonclient'}) - -# Some of these packages are installed via CPM (pybind11), others are transitive deps who's version is determined by -# other packages but we use directly (glog), while others exist in the build environment and are statically linked -# (zlib) and not specified in the runtime environment. -# Unfortunately this means these versions will need to be updated manually, although any that exist in the resolved -# environment will have their versions updated to match the resolved environment. -KNOWN_NON_CONDA_DEPS = [ - ('c-ares', '1.32.3'), - ('dfencoder', '0.0.37'), - ('gflags', '2.2.2'), - ('glog', '0.7.1'), - ('nlohmann_json', '3.11.3'), - ('nv-RxCpp', '4.1.1.2'), - ('librdkafka', '1.6.2'), - ('pkg-config', '0.29.2'), - ('protobuf', '4.25.3'), - ('pybind11', '2.8.1'), - ('pybind11-stubgen', '0.10.5'), - ('python-versioneer', '0.22'), - ('rapidjson', '1.1.0'), - ('rdma-core', '54.0'), - ('RxCpp', '4.1.1'), - ('versioneer', '0.18'), - ('versioneer-518', '0.19'), # Conda has a version, but the git repo is unversioned - ('zlib', '1.3.1'), -] - - -GIT_TAG_FORMAT = { # any packages not in this dict are assumned to have the TAG_V_PREFIX - 'appdirs': TAG_BARE, - 'click': TAG_BARE, - 'databricks-cli': TAG_BARE, - 'dill': TAG_NAME_DASH_BARE, - 'docker-py': TAG_BARE, - 'feedparser': TAG_BARE, - 'graphviz': TAG_BARE, - 'networkx': TAG_NAME_DASH_BARE, - 'pip': TAG_BARE, - 'pkg-config': TAG_NAME_DASH_BARE, - 'pluggy': TAG_BARE, - 'pybind11-stubgen': TAG_BARE, - 'python-versioneer': TAG_BARE, - 'scikit-learn': TAG_BARE, - 'sqlalchemy': lambda ver: f"rel_{ver.replace('.', '_')}", - 'urllib3': TAG_BARE, - 'websockets': TAG_BARE, -} - -logger = logging.getLogger(__file__) - - -def main(): - args = parse_args(conda_json_cmd=CONDA_JSON_CMD, - default_conda_yaml=os.path.join(PROJ_ROOT, - "conda/environments/runtime_cuda-125_arch-x86_64.yaml"), - default_conda_json=os.path.join(PROJ_ROOT, ".tmp/container_pkgs.json")) - log_level = logging._nameToLevel[args.log_level.upper()] - logging.basicConfig(level=log_level, format="%(message)s") - - # Set the log level for requests and urllib3 - logging.getLogger('requests').setLevel(args.http_log_level) - logging.getLogger("urllib3").setLevel(args.http_log_level) - - needs_cleanup = False - download_dir: str | None = args.download_dir - if download_dir is None: - if args.download: - download_dir = tempfile.mkdtemp(prefix="morpheus_deps_download_") - logger.info("Created temporary download directory: %s", download_dir) - needs_cleanup = True - elif args.extract: - logger.error("--extract requires either --download or --download_dir to be set.") - sys.exit(1) - - extract_dir: str | None = args.extract_dir - if extract_dir is None and args.extract: - extract_dir = tempfile.mkdtemp(prefix="morpheus_deps_extract_") - logger.info("Created temporary extract directory: %s", extract_dir) - - num_missing_packages = download_source_deps(conda_yaml=args.conda_yaml, - conda_json=args.conda_json, - package_aliases=PACKAGE_ALIASES, - known_github_urls=KNOWN_GITHUB_URLS, - known_gitlab_urls=KNOWN_GITLAB_URLS, - other_repos=OTHER_REPOS, - known_first_party=KNOWN_FIRST_PARTY, - git_tag_format=GIT_TAG_FORMAT, - known_non_conda_deps=KNOWN_NON_CONDA_DEPS, - dry_run=args.dry_run, - verify_urls=args.verify_urls, - download=args.download, - download_dir=download_dir, - extract=args.extract, - extract_dir=extract_dir) - - if needs_cleanup and not args.no_clean and download_dir is not None: - logger.info("Removing temporary download directory: %s", download_dir) - shutil.rmtree(download_dir) - - if num_missing_packages > 0: - sys.exit(1) - - -if __name__ == "__main__": - main() diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh index 2a00b52fc..6b71e9d22 100755 --- a/ci/release/update-version.sh +++ b/ci/release/update-version.sh @@ -115,9 +115,6 @@ sed_runner "s/${CURRENT_SHORT_TAG}/${NEXT_SHORT_TAG}/g" docs/source/getting_star sed_runner "s|blob/branch-${CURRENT_SHORT_TAG}|blob/branch-${NEXT_SHORT_TAG}|g" models/model-cards/*.md sed_runner "s|tree/branch-${CURRENT_SHORT_TAG}|tree/branch-${NEXT_SHORT_TAG}|g" models/model-cards/*.md -# thirdparty -sed_runner "s|tree/branch-${CURRENT_SHORT_TAG}|tree/branch-${NEXT_SHORT_TAG}|g" thirdparty/README.md - # Update the version of the Morpheus model container # We need to update several files, however we need to avoid symlinks as well as the build and .cache directories DOCS_MD_FILES=$(find -P ./docs/source/ -type f -iname "*.md") diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh index ce7656092..901453783 100755 --- a/docker/entrypoint.sh +++ b/docker/entrypoint.sh @@ -1,13 +1,13 @@ #!/bin/bash --login # SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -23,5 +23,8 @@ conda activate morpheus SRC_FILE="/opt/docker/bin/entrypoint_source" [ -f "${SRC_FILE}" ] && source "${SRC_FILE}" +THIRDPARTY_FILE="thirdparty/morpheus-container-thirdparty-oss.txt" +[ -f "${THIRDPARTY_FILE}" ] && cat "${THIRDPARTY_FILE}" + # Run whatever the user wants. -exec "$@" \ No newline at end of file +exec "$@" diff --git a/models/docker/Dockerfile b/models/docker/Dockerfile index 2a7c4b322..d52cd3cda 100644 --- a/models/docker/Dockerfile +++ b/models/docker/Dockerfile @@ -28,6 +28,9 @@ RUN apt update && \ apt clean && \ rm -rf /var/lib/apt/lists/* +# Append our third party notice to the entrypoint message +COPY "${MORPHEUS_ROOT_HOST}/thirdparty/models-container-thirdparty-oss.txt" "/opt/nvidia/entrypoint.d/80-morpheus-thirdparty-oss.txt" + # Copy the model repository COPY "${MORPHEUS_ROOT_HOST}/models" "./models" diff --git a/thirdparty/README.md b/thirdparty/README.md index 8d750d8d3..651189ed6 100644 --- a/thirdparty/README.md +++ b/thirdparty/README.md @@ -17,4 +17,4 @@ limitations under the License. # Source Code for OSS Packages in the NVIDIA Morpheus Docker container -The source code for OSS packages which are included in the NVIDIA Morpheus Docker image is available at [https://github.com/nv-morpheus/morpheus_third_party_oss/tree/branch-25.02](https://github.com/nv-morpheus/morpheus_third_party_oss/tree/branch-25.02) +The source code for OSS packages which are included in the NVIDIA Morpheus Docker containers are available at [https://github.com/nv-morpheus/morpheus_third_party_oss](https://github.com/nv-morpheus/morpheus_third_party_oss) diff --git a/thirdparty/models-container-thirdparty-oss.txt b/thirdparty/models-container-thirdparty-oss.txt new file mode 100644 index 000000000..90a9e2921 --- /dev/null +++ b/thirdparty/models-container-thirdparty-oss.txt @@ -0,0 +1,7 @@ + +=================================== +== Morpheus Triton Server Models == +=================================== + +The source code for OSS packages which are included in the NVIDIA Morpheus Triton Server Models Docker container +are available at https://github.com/nv-morpheus/morpheus_third_party_oss diff --git a/thirdparty/morpheus-container-thirdparty-oss.txt b/thirdparty/morpheus-container-thirdparty-oss.txt new file mode 100644 index 000000000..e4ce0ecf9 --- /dev/null +++ b/thirdparty/morpheus-container-thirdparty-oss.txt @@ -0,0 +1,8 @@ + +============== +== Morpheus == +============== + +The source code for OSS packages which are included in the NVIDIA Morpheus Docker container +are available at https://github.com/nv-morpheus/morpheus_third_party_oss + From 4f2ed4ffccea39b7a724400786e2b4ce6982c0d9 Mon Sep 17 00:00:00 2001 From: Yuchen Zhang <134643420+yczhang-nv@users.noreply.github.com> Date: Thu, 6 Feb 2025 15:40:25 -0800 Subject: [PATCH 4/4] Remove `pe_count` option from `MonitorStage` (#2178) Closes #2177 ## By Submitting this PR I confirm: - I am familiar with the [Contributing Guidelines](https://github.com/nv-morpheus/Morpheus/blob/main/docs/source/developer_guide/contributing.md). - When the PR is ready for review, new or existing tests cover these changes. - When the PR is ready for review, the documentation is up to date with these changes. Authors: - Yuchen Zhang (https://github.com/yczhang-nv) Approvers: - David Gardner (https://github.com/dagardner-nv) URL: https://github.com/nv-morpheus/Morpheus/pull/2178 --- python/morpheus/morpheus/stages/general/monitor_stage.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/python/morpheus/morpheus/stages/general/monitor_stage.py b/python/morpheus/morpheus/stages/general/monitor_stage.py index 0794c5ec3..492fa38ef 100644 --- a/python/morpheus/morpheus/stages/general/monitor_stage.py +++ b/python/morpheus/morpheus/stages/general/monitor_stage.py @@ -150,8 +150,6 @@ def _build_single(self, builder: mrc.Builder, input_node: mrc.SegmentObject) -> self._mc._font_style, self._mc._determine_count_fn) - node.launch_options.pe_count = self._config.num_threads - else: # Use a component so we track progress using the upstream progress engine. This will provide more accurate # results