diff --git a/assets/inference/environments/minimal-py312-cuda12.4-inference/asset.yaml b/assets/inference/environments/minimal-py312-cuda12.4-inference/asset.yaml new file mode 100644 index 0000000000..708bf40bb4 --- /dev/null +++ b/assets/inference/environments/minimal-py312-cuda12.4-inference/asset.yaml @@ -0,0 +1,12 @@ +name: minimal-py312-cuda12.4-inference + +version: auto +type: environment +spec: spec.yaml +extra_config: environment.yaml +test: + pytest: + enabled: true + pip_requirements: tests/requirements.txt + tests_dir: tests +categories: ["Inference"] \ No newline at end of file diff --git a/assets/inference/environments/minimal-py312-cuda12.4-inference/context/Dockerfile b/assets/inference/environments/minimal-py312-cuda12.4-inference/context/Dockerfile new file mode 100644 index 0000000000..1c1ff37eec --- /dev/null +++ b/assets/inference/environments/minimal-py312-cuda12.4-inference/context/Dockerfile @@ -0,0 +1,22 @@ +FROM mcr.microsoft.com/azureml/inference-base-cuda12.4-ubuntu22.04:{{latest-image-tag}} + +WORKDIR / +ENV AZUREML_CONDA_ENVIRONMENT_PATH=/azureml-envs/minimal +ENV AZUREML_CONDA_DEFAULT_ENVIRONMENT=$AZUREML_CONDA_ENVIRONMENT_PATH + +# Prepend path to AzureML conda environment +ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH + +ENV LD_LIBRARY_PATH $AZUREML_CONDA_ENVIRONMENT_PATH/lib:$LD_LIBRARY_PATH + +# Create conda environment +USER root +RUN sed -i '66,148d' /var/runit/gunicorn/run +COPY conda_dependencies.yaml . +RUN conda env create -p $AZUREML_CONDA_ENVIRONMENT_PATH -f conda_dependencies.yaml -q && \ + rm conda_dependencies.yaml && \ + conda run -p $AZUREML_CONDA_ENVIRONMENT_PATH pip cache purge && \ + conda clean -a -y +USER dockeruser + +CMD [ "runsvdir", "/var/runit" ] \ No newline at end of file diff --git a/assets/inference/environments/minimal-py312-cuda12.4-inference/context/conda_dependencies.yaml b/assets/inference/environments/minimal-py312-cuda12.4-inference/context/conda_dependencies.yaml new file mode 100644 index 0000000000..dd0298d1ea --- /dev/null +++ b/assets/inference/environments/minimal-py312-cuda12.4-inference/context/conda_dependencies.yaml @@ -0,0 +1,10 @@ +name: minimal +channels: +- conda-forge +- anaconda +dependencies: +- python=3.12 +- pip=24.0 +- pip: + - azureml-inference-server-http=={{latest-pypi-version}} + - numpy=={{latest-pypi-version}} \ No newline at end of file diff --git a/assets/inference/environments/minimal-py312-cuda12.4-inference/environment.yaml b/assets/inference/environments/minimal-py312-cuda12.4-inference/environment.yaml new file mode 100644 index 0000000000..14402af7cc --- /dev/null +++ b/assets/inference/environments/minimal-py312-cuda12.4-inference/environment.yaml @@ -0,0 +1,12 @@ +image: + name: azureml/curated/minimal-py312-cuda12.4-inference + os: linux + context: + dir: context + dockerfile: Dockerfile + template_files: + - Dockerfile + - conda_dependencies.yaml + publish: + location: mcr + visibility: public diff --git a/assets/inference/environments/minimal-py312-cuda12.4-inference/spec.yaml b/assets/inference/environments/minimal-py312-cuda12.4-inference/spec.yaml new file mode 100644 index 0000000000..e926a31d17 --- /dev/null +++ b/assets/inference/environments/minimal-py312-cuda12.4-inference/spec.yaml @@ -0,0 +1,18 @@ +$schema: https://azuremlschemas.azureedge.net/latest/environment.schema.json + +description: >- + AzureML minimal/Python 3.12 cuda12.4 environment. + +name: "{{asset.name}}" +version: "{{asset.version}}" + +build: + path: "{{image.context.path}}" + dockerfile_path: "{{image.dockerfile.path}}" + +os_type: linux + +tags: + OS: Ubuntu22.04 + Inferencing: "" + Preview: "" diff --git a/assets/inference/environments/minimal-py312-cuda12.4-inference/tests/minimal_inference_test.py b/assets/inference/environments/minimal-py312-cuda12.4-inference/tests/minimal_inference_test.py new file mode 100644 index 0000000000..68b7725df8 --- /dev/null +++ b/assets/inference/environments/minimal-py312-cuda12.4-inference/tests/minimal_inference_test.py @@ -0,0 +1,81 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +"""Tests running a sample job in the minimal py312 cuda12.4 environment.""" +import os +import time +from pathlib import Path +from azure.ai.ml import command, MLClient +from azure.ai.ml._restclient.models import JobStatus +from azure.ai.ml.entities import Environment, BuildContext +from azure.identity import AzureCliCredential + +BUILD_CONTEXT = Path("../context") +JOB_SOURCE_CODE = "src" +TIMEOUT_MINUTES = os.environ.get("timeout_minutes", 30) +STD_LOG = Path("artifacts/user_logs/std_log.txt") + + +def test_minimal_gpu_inference(): + """Tests a sample job using minimal py312 cuda12.4 as the environment.""" + this_dir = Path(__file__).parent + + subscription_id = os.environ.get("subscription_id") + resource_group = os.environ.get("resource_group") + workspace_name = os.environ.get("workspace") + + ml_client = MLClient( + AzureCliCredential(), subscription_id, resource_group, workspace_name + ) + + env_name = "minimal_gpu_inference" + + env_docker_context = Environment( + build=BuildContext(path=this_dir / BUILD_CONTEXT), + name=env_name, + description="minimal py312 cuda12.4 inference environment created from a Docker context.", + ) + returned_env = ml_client.environments.create_or_update(env_docker_context) + + # create the command + job = command( + code=this_dir / JOB_SOURCE_CODE, # local path where the code is stored + command="python main.py --score ${{inputs.score}}", + inputs=dict( + score="valid_score.py", + ), + environment=returned_env, + compute=os.environ.get("gpu_cluster"), + display_name="minimal-gpu-inference-example", + description="A test run of the minimal py312 cuda12.4 inference curated environment", + experiment_name="minimalGPUInferenceExperiment" + ) + + returned_job = ml_client.create_or_update(job) + assert returned_job is not None + + # Poll until final status is reached or timed out + timeout = time.time() + (TIMEOUT_MINUTES * 60) + while time.time() <= timeout: + job = ml_client.jobs.get(returned_job.name) + status = job.status + if status in [JobStatus.COMPLETED, JobStatus.FAILED]: + break + time.sleep(30) # sleep 30 seconds + else: + # Timeout + ml_client.jobs.cancel(returned_job.name) + raise Exception(f"Test aborted because the job took longer than {TIMEOUT_MINUTES} minutes. " + f"Last status was {status}.") + + if status == JobStatus.FAILED: + ml_client.jobs.download(returned_job.name) + if STD_LOG.exists(): + print(f"*** BEGIN {STD_LOG} ***") + with open(STD_LOG, "r") as f: + print(f.read(), end="") + print(f"*** END {STD_LOG} ***") + else: + ml_client.jobs.stream(returned_job.name) + + assert status == JobStatus.COMPLETED diff --git a/assets/inference/environments/minimal-py312-cuda12.4-inference/tests/requirements.txt b/assets/inference/environments/minimal-py312-cuda12.4-inference/tests/requirements.txt new file mode 100644 index 0000000000..4d32a53fbc --- /dev/null +++ b/assets/inference/environments/minimal-py312-cuda12.4-inference/tests/requirements.txt @@ -0,0 +1,3 @@ +azure-ai-ml==1.2.0 +marshmallow==3.23.1 +azure.identity diff --git a/assets/inference/environments/minimal-py312-cuda12.4-inference/tests/src/main.py b/assets/inference/environments/minimal-py312-cuda12.4-inference/tests/src/main.py new file mode 100644 index 0000000000..d7629baaa1 --- /dev/null +++ b/assets/inference/environments/minimal-py312-cuda12.4-inference/tests/src/main.py @@ -0,0 +1,86 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +"""Validate minimal inference gpu environment by running azmlinfsrv.""" + +# imports +import os +import subprocess +import requests +from datetime import datetime, timedelta +import time +import argparse + + +def main(args): + """Start inference server and post scoring request.""" + # start the server + server_process = start_server("/var/tmp", ["--entry_script", args.score, "--port", "8081"]) + + # score a request + req = score_with_post() + server_process.kill() + + print(req) + + +def start_server(log_directory, args, timeout=timedelta(seconds=15)): + """Start inference server with options.""" + stderr_file = open(os.path.join(log_directory, "stderr.txt"), "w") + stdout_file = open(os.path.join(log_directory, "stdout.txt"), "w") + + env = os.environ.copy() + server_process = subprocess.Popen(["azmlinfsrv"] + args, stdout=stdout_file, stderr=stderr_file, env=env) + + max_time = datetime.now() + timeout + + while datetime.now() < max_time: + time.sleep(0.25) + req = None + try: + req = requests.get("http://127.0.0.1:8081", timeout=10) + except Exception as e: + print(e) + + if req is not None and req.ok: + break + + # Ensure the server is still running + status = server_process.poll() + if status is not None: + break + + print(log_directory, "stderr.txt") + print(log_directory, "stdout.txt") + + return server_process + + +def score_with_post(headers=None, data=None): + """Post scoring request to the server.""" + url = "http://127.0.0.1:8081/score" + return requests.post(url=url, headers=headers, data=data) + + +def parse_args(): + """Parse input arguments.""" + # setup arg parser + parser = argparse.ArgumentParser() + + # add arguments + parser.add_argument("--score", type=str) + + # parse args + args = parser.parse_args() + + # return args + return args + + +# run script +if __name__ == "__main__": + # parse args + args = parse_args() + + # run main function + main(args) diff --git a/assets/inference/environments/minimal-py312-cuda12.4-inference/tests/src/valid_score.py b/assets/inference/environments/minimal-py312-cuda12.4-inference/tests/src/valid_score.py new file mode 100644 index 0000000000..8642f1d230 --- /dev/null +++ b/assets/inference/environments/minimal-py312-cuda12.4-inference/tests/src/valid_score.py @@ -0,0 +1,34 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +"""A basic entry script.""" + +# imports +import uuid +import os +from datetime import datetime +from azureml_inference_server_http.api.aml_response import AMLResponse +from azureml_inference_server_http.api.aml_request import rawhttp + + +def init(): + """Sample init function.""" + print("Initializing") + + +@rawhttp +def run(input_data): + """Sample run function.""" + print('A new request received~~~') + try: + r = dict() + r['request_id'] = str(uuid.uuid4()) + r['now'] = datetime.now().strftime("%Y/%m/%d %H:%M:%S %f") + r['pid'] = os.getpid() + r['message'] = "this is a sample" + + return AMLResponse(r, 200, json_str=True) + except Exception as e: + error = str(e) + + return AMLResponse({'error': error}, 500, json_str=True)