Skip to content

Commit

Permalink
feat: hello world
Browse files Browse the repository at this point in the history
Co-authored-by: Piotr Marcinkiewicz <[email protected]>
Co-authored-by: Tanmay Verma <[email protected]>
  • Loading branch information
3 people authored Jan 17, 2025
1 parent 9777274 commit b4ee99f
Show file tree
Hide file tree
Showing 30 changed files with 1,413 additions and 129 deletions.
12 changes: 0 additions & 12 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ See the License for the specific language governing permissions and
limitations under the License.
-->


# Triton Distributed

<h4> A Datacenter Scale Distributed Inference Serving Framework </h4>
Expand Down Expand Up @@ -84,14 +83,3 @@ HF_TOKEN```) and mounts common directories such as ```/tmp:/tmp```,

Please see the instructions in the corresponding example for specific
deployment instructions.


<!--
## Goals
## Concepts
## Examples
-->
2 changes: 1 addition & 1 deletion container/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ COPY . /workspace
RUN /workspace/icp/protos/gen_python.sh

# Sets pythonpath for python modules
ENV PYTHONPATH="${PYTHONPATH}:/workspace/icp/src/python:/workspace/worker/src/python"
ENV PYTHONPATH="${PYTHONPATH}:/workspace/icp/src/python:/workspace/worker/src/python:/workspace/examples"

# Command and Entrypoint
CMD []
Expand Down
17 changes: 17 additions & 0 deletions examples/hello_world/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
<!--
SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
SPDX-License-Identifier: Apache-2.0
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->

14 changes: 14 additions & 0 deletions examples/hello_world/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
78 changes: 78 additions & 0 deletions examples/hello_world/client/__main__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import multiprocessing
import signal
import sys
import time
from typing import Optional

from .client import _start_client
from .parser import parse_args

processes: Optional[list[multiprocessing.context.SpawnProcess]] = None


def handler(signum, frame):
exit_code = 0
if processes:
print("Stopping Clients")
for process in processes:
process.terminate()
process.kill()
process.join()
if process.exitcode is not None:
exit_code += process.exitcode
print(f"Clients Stopped Exit Code {exit_code}")
sys.exit(exit_code)


signals = (signal.SIGHUP, signal.SIGTERM, signal.SIGINT)
for sig in signals:
try:
signal.signal(sig, handler)
except Exception:
pass


def main(args):
global processes
process_context = multiprocessing.get_context("spawn")
args.lock = process_context.Lock()
processes = []
start_time = time.time()
for index in range(args.clients):
processes.append(
process_context.Process(target=_start_client, args=(index, args))
)
processes[-1].start()

for process in processes:
process.join()
end_time = time.time()
print(
f"Throughput: {(args.requests_per_client*args.clients)/(end_time-start_time)} Total Time: {end_time-start_time}"
)
exit_code = 0
for process in processes:
if process.exitcode is not None:
exit_code += process.exitcode
print(f"Clients Stopped Exit Code {exit_code}")
return exit_code


if __name__ == "__main__":
args = parse_args()
sys.exit(main(args))
104 changes: 104 additions & 0 deletions examples/hello_world/client/client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import asyncio
import sys

import cupy
import numpy
from tqdm import tqdm
from triton_distributed.icp import NatsRequestPlane, UcpDataPlane
from triton_distributed.worker import RemoteOperator
from tritonserver import MemoryType


def _get_input_sizes(args):
return numpy.maximum(
0,
numpy.round(
numpy.random.normal(
loc=args.input_size_mean,
scale=args.input_size_stdev,
size=args.requests_per_client,
)
),
).astype(int)


def _start_client(client_index, args):
sys.exit(asyncio.run(client(client_index, args)))


async def client(client_index, args):
request_count = args.requests_per_client
try:
request_plane = NatsRequestPlane(args.request_plane_uri)
data_plane = UcpDataPlane()
await request_plane.connect()
data_plane.connect()

remote_operator: RemoteOperator = RemoteOperator(
args.operator, request_plane, data_plane
)
input_sizes = _get_input_sizes(args)

inputs = [
numpy.array(numpy.random.randint(0, 100, input_sizes[index]))
for index in range(request_count)
]
tqdm.set_lock(args.lock)

with tqdm(
total=args.requests_per_client,
desc=f"Client: {client_index}",
unit="request",
position=client_index,
leave=False,
) as pbar:
requests = [
await remote_operator.async_infer(
inputs={"input": inputs[index]}, request_id=str(index)
)
for index in range(request_count)
]

for request in requests:
async for response in request:
for output_name, output_value in response.outputs.items():
if output_value.memory_type == MemoryType.CPU:
output = numpy.from_dlpack(output_value)
numpy.testing.assert_array_equal(
output, inputs[int(response.request_id)]
)
else:
output = cupy.from_dlpack(output_value)
cupy.testing.assert_array_equal(
output, inputs[int(response.request_id)]
)
del output_value

pbar.set_description(
f"Client: {client_index} Received Response: {response.request_id} From: {response.component_id} Error: {response.error}"
)
pbar.update(1)
del response

await request_plane.close()
data_plane.close()
except Exception as e:
print(f"Exception: {e}")
return 1
else:
return 0
64 changes: 64 additions & 0 deletions examples/hello_world/client/parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import argparse


def parse_args(args=None):
parser = argparse.ArgumentParser(description="Hello World Client")

parser.add_argument(
"--request-plane-uri",
type=str,
default="nats://localhost:4223",
help="URI of request plane",
)

parser.add_argument(
"--requests-per-client",
type=int,
default=100,
help="number of requests to send per client",
)

parser.add_argument(
"--operator",
type=str,
choices=["encoder_decoder", "encoder", "decoder"],
default="encoder_decoder",
help="operator to send requests to. In this example all operators have the same input and output names.",
)

parser.add_argument(
"--input-size-mean",
type=int,
default=1000,
help="average input size for requests",
)

parser.add_argument(
"--input-size-stdev",
type=float,
default=0,
help="standard deviation for input size",
)

parser.add_argument(
"--clients", type=int, default=1, help="number of concurrent clients to launch."
)

args = parser.parse_args(args)

return args
Loading

0 comments on commit b4ee99f

Please sign in to comment.