feat: hello world

Co-authored-by: Piotr Marcinkiewicz <[email protected]> Co-authored-by: Tanmay Verma <[email protected]>
triton-inference-server · Jan 17, 2025 · b4ee99f · b4ee99f
1 parent 9777274
commit b4ee99f
Show file tree

Hide file tree

Showing 30 changed files with 1,413 additions and 129 deletions.
diff --git a/README.md b/README.md
@@ -15,7 +15,6 @@ See the License for the specific language governing permissions and
 limitations under the License.
 -->
 
-
 # Triton Distributed
 
 <h4> A Datacenter Scale Distributed Inference Serving Framework </h4>
@@ -84,14 +83,3 @@ HF_TOKEN```) and mounts common directories such as ```/tmp:/tmp```,
 
 Please see the instructions in the corresponding example for specific
 deployment instructions.
-
-
-<!--
-
-## Goals
-
-## Concepts
-
-## Examples
-
--->
diff --git a/container/Dockerfile b/container/Dockerfile
@@ -133,7 +133,7 @@ COPY . /workspace
 RUN /workspace/icp/protos/gen_python.sh
 
 # Sets pythonpath for python modules
-ENV PYTHONPATH="${PYTHONPATH}:/workspace/icp/src/python:/workspace/worker/src/python"
+ENV PYTHONPATH="${PYTHONPATH}:/workspace/icp/src/python:/workspace/worker/src/python:/workspace/examples"
 
 # Command and Entrypoint
 CMD []

diff --git a/examples/hello_world/README.md b/examples/hello_world/README.md
@@ -0,0 +1,17 @@
+<!--
+SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+SPDX-License-Identifier: Apache-2.0
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+-->
+
diff --git a/examples/hello_world/__init__.py b/examples/hello_world/__init__.py
@@ -0,0 +1,14 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/examples/hello_world/client/__main__.py b/examples/hello_world/client/__main__.py
@@ -0,0 +1,78 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import multiprocessing
+import signal
+import sys
+import time
+from typing import Optional
+
+from .client import _start_client
+from .parser import parse_args
+
+processes: Optional[list[multiprocessing.context.SpawnProcess]] = None
+
+
+def handler(signum, frame):
+    exit_code = 0
+    if processes:
+        print("Stopping Clients")
+        for process in processes:
+            process.terminate()
+            process.kill()
+            process.join()
+            if process.exitcode is not None:
+                exit_code += process.exitcode
+    print(f"Clients Stopped Exit Code {exit_code}")
+    sys.exit(exit_code)
+
+
+signals = (signal.SIGHUP, signal.SIGTERM, signal.SIGINT)
+for sig in signals:
+    try:
+        signal.signal(sig, handler)
+    except Exception:
+        pass
+
+
+def main(args):
+    global processes
+    process_context = multiprocessing.get_context("spawn")
+    args.lock = process_context.Lock()
+    processes = []
+    start_time = time.time()
+    for index in range(args.clients):
+        processes.append(
+            process_context.Process(target=_start_client, args=(index, args))
+        )
+        processes[-1].start()
+
+    for process in processes:
+        process.join()
+    end_time = time.time()
+    print(
+        f"Throughput: {(args.requests_per_client*args.clients)/(end_time-start_time)} Total Time: {end_time-start_time}"
+    )
+    exit_code = 0
+    for process in processes:
+        if process.exitcode is not None:
+            exit_code += process.exitcode
+    print(f"Clients Stopped Exit Code {exit_code}")
+    return exit_code
+
+
+if __name__ == "__main__":
+    args = parse_args()
+    sys.exit(main(args))
diff --git a/examples/hello_world/client/client.py b/examples/hello_world/client/client.py
@@ -0,0 +1,104 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import asyncio
+import sys
+
+import cupy
+import numpy
+from tqdm import tqdm
+from triton_distributed.icp import NatsRequestPlane, UcpDataPlane
+from triton_distributed.worker import RemoteOperator
+from tritonserver import MemoryType
+
+
+def _get_input_sizes(args):
+    return numpy.maximum(
+        0,
+        numpy.round(
+            numpy.random.normal(
+                loc=args.input_size_mean,
+                scale=args.input_size_stdev,
+                size=args.requests_per_client,
+            )
+        ),
+    ).astype(int)
+
+
+def _start_client(client_index, args):
+    sys.exit(asyncio.run(client(client_index, args)))
+
+
+async def client(client_index, args):
+    request_count = args.requests_per_client
+    try:
+        request_plane = NatsRequestPlane(args.request_plane_uri)
+        data_plane = UcpDataPlane()
+        await request_plane.connect()
+        data_plane.connect()
+
+        remote_operator: RemoteOperator = RemoteOperator(
+            args.operator, request_plane, data_plane
+        )
+        input_sizes = _get_input_sizes(args)
+
+        inputs = [
+            numpy.array(numpy.random.randint(0, 100, input_sizes[index]))
+            for index in range(request_count)
+        ]
+        tqdm.set_lock(args.lock)
+
+        with tqdm(
+            total=args.requests_per_client,
+            desc=f"Client: {client_index}",
+            unit="request",
+            position=client_index,
+            leave=False,
+        ) as pbar:
+            requests = [
+                await remote_operator.async_infer(
+                    inputs={"input": inputs[index]}, request_id=str(index)
+                )
+                for index in range(request_count)
+            ]
+
+            for request in requests:
+                async for response in request:
+                    for output_name, output_value in response.outputs.items():
+                        if output_value.memory_type == MemoryType.CPU:
+                            output = numpy.from_dlpack(output_value)
+                            numpy.testing.assert_array_equal(
+                                output, inputs[int(response.request_id)]
+                            )
+                        else:
+                            output = cupy.from_dlpack(output_value)
+                            cupy.testing.assert_array_equal(
+                                output, inputs[int(response.request_id)]
+                            )
+                        del output_value
+
+                    pbar.set_description(
+                        f"Client: {client_index} Received Response: {response.request_id} From: {response.component_id} Error: {response.error}"
+                    )
+                    pbar.update(1)
+                    del response
+
+        await request_plane.close()
+        data_plane.close()
+    except Exception as e:
+        print(f"Exception: {e}")
+        return 1
+    else:
+        return 0
diff --git a/examples/hello_world/client/parser.py b/examples/hello_world/client/parser.py
@@ -0,0 +1,64 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+
+
+def parse_args(args=None):
+    parser = argparse.ArgumentParser(description="Hello World Client")
+
+    parser.add_argument(
+        "--request-plane-uri",
+        type=str,
+        default="nats://localhost:4223",
+        help="URI of request plane",
+    )
+
+    parser.add_argument(
+        "--requests-per-client",
+        type=int,
+        default=100,
+        help="number of requests to send per client",
+    )
+
+    parser.add_argument(
+        "--operator",
+        type=str,
+        choices=["encoder_decoder", "encoder", "decoder"],
+        default="encoder_decoder",
+        help="operator to send requests to. In this example all operators have the same input and output names.",
+    )
+
+    parser.add_argument(
+        "--input-size-mean",
+        type=int,
+        default=1000,
+        help="average input size for requests",
+    )
+
+    parser.add_argument(
+        "--input-size-stdev",
+        type=float,
+        default=0,
+        help="standard deviation for input size",
+    )
+
+    parser.add_argument(
+        "--clients", type=int, default=1, help="number of concurrent clients to launch."
+    )
+
+    args = parser.parse_args(args)
+
+    return args