From a1f50c09c1c3657b2ec604ff0c51b2d69450085d Mon Sep 17 00:00:00 2001
From: Ted Themistokleous <tedthemistokleous@amd.com>
Date: Mon, 4 Dec 2023 21:43:01 +0000
Subject: [PATCH 01/32] Initial changes for MIGraphX/ROCm EP additiona to build

---
 tools/gen_ort_dockerfile.py | 49 +++++++++++++++++++++++++++++++++++++
 1 file changed, 49 insertions(+)

diff --git a/tools/gen_ort_dockerfile.py b/tools/gen_ort_dockerfile.py
index d38ac62..768a222 100755
--- a/tools/gen_ort_dockerfile.py
+++ b/tools/gen_ort_dockerfile.py
@@ -124,6 +124,18 @@ def dockerfile_for_linux(output_file):
     ln -s /etc/alternatives/libcudnn_so /usr/local/cudnn-$_CUDNN_VERSION/cuda/lib64/libcudnn.so
 """
 
+    if FLAGS.enable_rocm:
+        df += """
+# Allow configure to pick up rocDNN where it expects it.
+# (Note: $CUDNN_VERSION is defined by base image)
+RUN _ROCDNN_VERSION=$(echo $ROCDNN_VERSION | cut -d. -f1-2) && \
+    mkdir -p /usr/local/rocdnn-$_ROCDNN_VERSION/rocm/include && \
+    ln -s /usr/include/rocdnn.h /usr/local/rocdnn-$_ROCDNN_VERSION/rocm/include/rocdnn.h && \
+    mkdir -p /usr/local/rocdnn-$_ROCDNN_VERSION/rocm/lib64 && \
+    ln -s /etc/alternatives/librocdnn_so /usr/local/rocdnn-$_ROCDNN_VERSION/rocm/lib64/librocdnn.so
+"""
+
+
     if FLAGS.ort_openvino is not None:
         df += """
 # Install OpenVINO
@@ -219,6 +231,14 @@ def dockerfile_for_linux(output_file):
             if FLAGS.tensorrt_home is not None:
                 ep_flags += ' --tensorrt_home "{}"'.format(FLAGS.tensorrt_home)
 
+    if FLAGS.ort_rocm: 
+        ep_flags = "--use_rocm"
+        if FLAGS.ort_migraphx:
+            ep_flags += " --use_migraphx"
+            if FLAGS.tensorrt_home is not None:
+                ep_flags += ' --migraphx_home "{}"'.format(FLAGS.tensorrt_home)
+       
+
     if os.name == "posix":
         if os.getuid() == 0:
             ep_flags += " --allow_running_as_root"
@@ -280,6 +300,12 @@ def dockerfile_for_linux(output_file):
        /opt/onnxruntime/lib
 """
 
+    if FLAGS.enable_rocm:
+        df += """
+RUN cp /workspace/build/${ONNXRUNTIME_BUILD_CONFIG}/libonnxruntime_providers_rocm.so \
+       /opt/onnxruntime/lib
+"""
+
     if FLAGS.ort_tensorrt:
         df += """
 # TensorRT specific headers and libraries
@@ -289,6 +315,15 @@ def dockerfile_for_linux(output_file):
        /opt/onnxruntime/lib
 """
 
+    if FLAGS.ort_migraphx:
+        df += """
+# TensorRT specific headers and libraries
+RUN cp /workspace/onnxruntime/include/onnxruntime/core/providers/migraphx/migraphx_provider_factory.h \
+       /opt/onnxruntime/include && \
+    cp /workspace/build/${ONNXRUNTIME_BUILD_CONFIG}/libonnxruntime_providers_migraphx.so \
+       /opt/onnxruntime/lib
+"""
+
     if FLAGS.ort_openvino is not None:
         df += """
 # OpenVino specific headers and libraries
@@ -400,6 +435,20 @@ def dockerfile_for_windows(output_file):
             ep_flags += " --use_tensorrt"
             if FLAGS.tensorrt_home is not None:
                 ep_flags += ' --tensorrt_home "{}"'.format(FLAGS.tensorrt_home)
+
+    if FLAGS.enable_rocm:
+        ep_flags = "--use_rocm"
+        if FLAGS.rocm_version is not None:
+            ep_flags += ' --rocm_version "{}"'.format(FLAGS.rocm_version)
+        if FLAGS.rocm_home is not None:
+            ep_flags += ' --rocm_home "{}"'.format(FLAGS.rocm_home)
+        if FLAGS.rocdnn_home is not None:
+            ep_flags += ' --rocdnn_home "{}"'.format(FLAGS.rocdnn_home)
+        if FLAGS.ort_migraphx:
+            ep_flags += " --use_migraphx"
+            if FLAGS.migraphx_home is not None:
+                ep_flags += ' --migraphx_home "{}"'.format(FLAGS.migraphx_home)
+
     if FLAGS.ort_openvino is not None:
         ep_flags += " --use_openvino CPU_FP32"
 

From 55051a4648e8223e9ab09540e3283ab9583d2819 Mon Sep 17 00:00:00 2001
From: Ted Themistokleous <tedthemistokleous@amd.com>
Date: Tue, 5 Dec 2023 22:38:33 +0000
Subject: [PATCH 02/32] More pieces for run with migraphx/rocm

---
 tools/gen_ort_dockerfile.py | 107 +++++++++++++++++++++++++++++++-----
 1 file changed, 94 insertions(+), 13 deletions(-)

diff --git a/tools/gen_ort_dockerfile.py b/tools/gen_ort_dockerfile.py
index 768a222..26a7570 100755
--- a/tools/gen_ort_dockerfile.py
+++ b/tools/gen_ort_dockerfile.py
@@ -124,16 +124,45 @@ def dockerfile_for_linux(output_file):
     ln -s /etc/alternatives/libcudnn_so /usr/local/cudnn-$_CUDNN_VERSION/cuda/lib64/libcudnn.so
 """
 
-    if FLAGS.enable_rocm:
+    if FLAGS.enable_rocm:        
         df += """
-# Allow configure to pick up rocDNN where it expects it.
-# (Note: $CUDNN_VERSION is defined by base image)
-RUN _ROCDNN_VERSION=$(echo $ROCDNN_VERSION | cut -d. -f1-2) && \
-    mkdir -p /usr/local/rocdnn-$_ROCDNN_VERSION/rocm/include && \
-    ln -s /usr/include/rocdnn.h /usr/local/rocdnn-$_ROCDNN_VERSION/rocm/include/rocdnn.h && \
-    mkdir -p /usr/local/rocdnn-$_ROCDNN_VERSION/rocm/lib64 && \
-    ln -s /etc/alternatives/librocdnn_so /usr/local/rocdnn-$_ROCDNN_VERSION/rocm/lib64/librocdnn.so
-"""
+        RUN apt-get clean && apt-get update && apt-get install -y locales
+        RUN locale-gen en_US.UTF-8
+        RUN update-locale LANG=en_US.UTF-8
+        ENV LC_ALL C.UTF-8
+        ENV LANG C.UTF-8
+
+        # Install rocm
+        RUN apt-get update && apt-get install -y gnupg2 --no-install-recommends curl && \
+        curl -sL http://repo.radeon.com/rocm/rocm.gpg.key | apt-key add - && \
+        sh -c 'echo deb [arch=amd64] http://repo.radeon.com/rocm/apt/${ROCM_VERSION}/ ubuntu main > /etc/apt/sources.list.d/rocm.list'
+
+        RUN apt-get update &&\
+            apt-get install -y sudo git bash build-essential rocm-dev python3-dev python3-pip miopen-hip \
+            rocblas half aria2 libnuma-dev pkg-config
+
+        RUN aria2c -q -d /tmp -o cmake-3.27.3-linux-x86_64.tar.gz \
+        https://github.com/Kitware/CMake/releases/download/v3.27.3/cmake-3.27.3-linux-x86_64.tar.gz &&\
+        tar -zxf /tmp/cmake-3.27.3-linux-x86_64.tar.gz --strip=1 -C /usr
+
+        # Install rbuild
+        RUN pip3 install https://github.com/RadeonOpenCompute/rbuild/archive/master.tar.gz numpy yapf==0.28.0
+
+        ENV PATH /opt/miniconda/bin:/code/cmake-3.27.3-linux-x86_64/bin:${PATH}
+        # Install rocm ep dependencies
+        RUN apt-get update &&\
+            apt-get install -y rocrand rccl hipsparse hipfft hipcub hipblas rocthrust
+        """
+
+        if FLAGS.enable_migraphx:
+            df += """
+            # Install MIGraphX from source
+            RUN mkdir -p /migraphx
+            RUN cd /migraphx && git clone --depth=1 --branch ${MIGRAPHX_VERSION} https://github.com/ROCmSoftwarePlatform/AMDMIGraphX src
+            RUN cd /migraphx && rbuild package --cxx /opt/rocm/llvm/bin/clang++ -d /migraphx/deps -B /migraphx/build -S /migraphx/src/ -DPYTHON_EXECUTABLE=/usr/bin/python3
+            RUN dpkg -i /migraphx/build/*.deb
+            RUN rm -rf /migraphx
+            """
 
 
     if FLAGS.ort_openvino is not None:
@@ -233,10 +262,16 @@ def dockerfile_for_linux(output_file):
 
     if FLAGS.ort_rocm: 
         ep_flags = "--use_rocm"
+        if FLAGS.rocm_version is not None:
+            ep_flags += ' --rocm_version "{}"'.format(FLAGS.rocm_version)
+        if FLAGS.rocm_home is not None:
+            ep_flags += ' --rocm_home "{}"'.format(FLAGS.rocm_home)
         if FLAGS.ort_migraphx:
+            if FLAGS.migraphx_version is not None:
+                ep_flags += ' --migraphx_version "{}"'.format(FLAGS.migraphx_version)
             ep_flags += " --use_migraphx"
-            if FLAGS.tensorrt_home is not None:
-                ep_flags += ' --migraphx_home "{}"'.format(FLAGS.tensorrt_home)
+            if FLAGS.migraphx_home is not None:
+                ep_flags += ' --migraphx_home "{}"'.format(FLAGS.migraphx_home)
        
 
     if os.name == "posix":
@@ -442,10 +477,10 @@ def dockerfile_for_windows(output_file):
             ep_flags += ' --rocm_version "{}"'.format(FLAGS.rocm_version)
         if FLAGS.rocm_home is not None:
             ep_flags += ' --rocm_home "{}"'.format(FLAGS.rocm_home)
-        if FLAGS.rocdnn_home is not None:
-            ep_flags += ' --rocdnn_home "{}"'.format(FLAGS.rocdnn_home)
         if FLAGS.ort_migraphx:
             ep_flags += " --use_migraphx"
+            if FLAGS.migraphx_version is not None:
+                ep_flags += ' --migraphx_version "{}"'.format(FLAGS.migraphx_version)
             if FLAGS.migraphx_home is not None:
                 ep_flags += ' --migraphx_home "{}"'.format(FLAGS.migraphx_home)
 
@@ -492,6 +527,14 @@ def dockerfile_for_windows(output_file):
 RUN copy \\workspace\\build\\Release\\Release\\onnxruntime_providers_cuda.lib \\opt\\onnxruntime\\lib
 WORKDIR /opt/onnxruntime/bin
 RUN copy \\workspace\\build\\Release\\Release\\onnxruntime_providers_cuda.dll \\opt\\onnxruntime\\bin
+"""
+
+    if FLAGS.enable_rocm:
+        df += """
+WORKDIR /opt/onnxruntime/lib
+RUN copy \\workspace\\build\\Release\\Release\\onnxruntime_providers_rocm.lib \\opt\\onnxruntime\\lib
+WORKDIR /opt/onnxruntime/bin
+RUN copy \\workspace\\build\\Release\\Release\\onnxruntime_providers_rocm.dll \\opt\\onnxruntime\\bin
 """
 
     if FLAGS.ort_tensorrt:
@@ -505,6 +548,19 @@ def dockerfile_for_windows(output_file):
 
 WORKDIR /opt/onnxruntime/lib
 RUN copy \\workspace\\build\\Release\\Release\\onnxruntime_providers_tensorrt.lib \\opt\\onnxruntime\\lib
+"""
+
+    if FLAGS.ort_migraphx:
+        df += """
+# MIGraphX specific headers and libraries
+WORKDIR /opt/onnxruntime/include
+RUN copy \\workspace\\onnxruntime\\include\\onnxruntime\\core\\providers\\migraphx\\migraphx_provider_factory.h \\opt\\onnxruntime\\include
+
+WORKDIR /opt/onnxruntime/lib
+RUN copy \\workspace\\build\\Release\\Release\\onnxruntime_providers_migraphx.dll \\opt\\onnxruntime\\bin
+
+WORKDIR /opt/onnxruntime/lib
+RUN copy \\workspace\\build\\Release\\Release\\onnxruntime_providers_migraphx.lib \\opt\\onnxruntime\\lib
 """
     with open(output_file, "w") as dfile:
         dfile.write(df)
@@ -555,6 +611,13 @@ def preprocess_gpu_flags():
 
         if FLAGS.tensorrt_home is None:
             FLAGS.tensorrt_home = "/usr/src/tensorrt"
+            
+        if FLAGS.rocm_home is None:
+            FLAGS.rocm_home = "/opt/rocm/"
+
+        if FLAGS.migraphx_home is None:
+            FLAGS.migraphx_home = "/opt/rocm/"
+
 
 
 if __name__ == "__main__":
@@ -593,6 +656,13 @@ def preprocess_gpu_flags():
     parser.add_argument(
         "--cuda-home", type=str, required=False, help="Home directory for CUDA."
     )
+    parser.add_argument(
+        "--rocm-version", type=str, required=False, help="Version for ROCM."
+    )
+    parser.add_argument(
+        "--rocm-home", type=str, required=False, help="Home directory for ROCM."
+    )
+
     parser.add_argument(
         "--cudnn-home", type=str, required=False, help="Home directory for CUDNN."
     )
@@ -616,6 +686,17 @@ def preprocess_gpu_flags():
     )
     parser.add_argument("--trt-version", type=str, default="", help="TRT version.")
 
+    parser.add_argument(
+        "--ort-migraphx",
+        action="store_true",
+        required=False,
+        help="Enable MIGraphX execution provider.",
+    )
+    parser.add_argument(
+        "--migraphx-home", type=str, required=False, help="Home directory for MIGraphX."
+    )
+    parser.add_argument("--migraphx-version", type=str, default="", help="MIGraphX version.")
+
     FLAGS = parser.parse_args()
     if FLAGS.enable_gpu:
         preprocess_gpu_flags()

From 20bd2280b4422378e6d3426e971bf83231242cdd Mon Sep 17 00:00:00 2001
From: Ted Themistokleous <tedthemistokleous@amd.com>
Date: Wed, 6 Dec 2023 15:39:11 +0000
Subject: [PATCH 03/32] Fix more hooks. Generate image with

python3 tools/gen_ort_dockerfile.py --migraphx-home=/opt/rocm --ort-migraphx --rocm-home=/opt/rocm/ --rocm-version=5.7 --enable-rocm --migraphx-version=rocm-5.7.1 --ort-version=1.17.0  --output=migx_rocm_triton_inf.dockerfile --triton-container=rocm/oai-triton:preview_2023-11-29_182
---
 tools/gen_ort_dockerfile.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/tools/gen_ort_dockerfile.py b/tools/gen_ort_dockerfile.py
index 26a7570..db4eada 100755
--- a/tools/gen_ort_dockerfile.py
+++ b/tools/gen_ort_dockerfile.py
@@ -154,7 +154,7 @@ def dockerfile_for_linux(output_file):
             apt-get install -y rocrand rccl hipsparse hipfft hipcub hipblas rocthrust
         """
 
-        if FLAGS.enable_migraphx:
+        if FLAGS.ort_migraphx:
             df += """
             # Install MIGraphX from source
             RUN mkdir -p /migraphx
@@ -260,7 +260,7 @@ def dockerfile_for_linux(output_file):
             if FLAGS.tensorrt_home is not None:
                 ep_flags += ' --tensorrt_home "{}"'.format(FLAGS.tensorrt_home)
 
-    if FLAGS.ort_rocm: 
+    if FLAGS.enable_rocm: 
         ep_flags = "--use_rocm"
         if FLAGS.rocm_version is not None:
             ep_flags += ' --rocm_version "{}"'.format(FLAGS.rocm_version)
@@ -636,6 +636,9 @@ def preprocess_gpu_flags():
     parser.add_argument(
         "--enable-gpu", action="store_true", required=False, help="Enable GPU support"
     )
+    parser.add_argument(
+        "--enable-rocm", action="store_true", required=False, help="Enable GPU support"
+    )
     parser.add_argument(
         "--ort-build-config",
         type=str,

From b4bdfea4c80b3a38d15e3e68dcdab13cef42f8a7 Mon Sep 17 00:00:00 2001
From: Ted Themistokleous <tedthemistokleous@amd.com>
Date: Wed, 6 Dec 2023 17:36:29 +0000
Subject: [PATCH 04/32] Update generator to create valid dockerfile for
 MIGraphX/ROCm EPs

---
 tools/gen_ort_dockerfile.py | 88 +++++++++++++++++++++----------------
 1 file changed, 51 insertions(+), 37 deletions(-)

diff --git a/tools/gen_ort_dockerfile.py b/tools/gen_ort_dockerfile.py
index db4eada..b9c9e09 100755
--- a/tools/gen_ort_dockerfile.py
+++ b/tools/gen_ort_dockerfile.py
@@ -124,45 +124,61 @@ def dockerfile_for_linux(output_file):
     ln -s /etc/alternatives/libcudnn_so /usr/local/cudnn-$_CUDNN_VERSION/cuda/lib64/libcudnn.so
 """
 
-    if FLAGS.enable_rocm:        
+    if FLAGS.enable_rocm:
+        if FLAGS.rocm_version is not None:
+            df += """ARG ROCM_VERSION={}""".format(FLAGS.rocm_version)
+        else:
+            df += """ARG ROCM_VERSION=5.7"""
+
         df += """
-        RUN apt-get clean && apt-get update && apt-get install -y locales
-        RUN locale-gen en_US.UTF-8
-        RUN update-locale LANG=en_US.UTF-8
-        ENV LC_ALL C.UTF-8
-        ENV LANG C.UTF-8
-
-        # Install rocm
-        RUN apt-get update && apt-get install -y gnupg2 --no-install-recommends curl && \
-        curl -sL http://repo.radeon.com/rocm/rocm.gpg.key | apt-key add - && \
-        sh -c 'echo deb [arch=amd64] http://repo.radeon.com/rocm/apt/${ROCM_VERSION}/ ubuntu main > /etc/apt/sources.list.d/rocm.list'
-
-        RUN apt-get update &&\
-            apt-get install -y sudo git bash build-essential rocm-dev python3-dev python3-pip miopen-hip \
-            rocblas half aria2 libnuma-dev pkg-config
-
-        RUN aria2c -q -d /tmp -o cmake-3.27.3-linux-x86_64.tar.gz \
-        https://github.com/Kitware/CMake/releases/download/v3.27.3/cmake-3.27.3-linux-x86_64.tar.gz &&\
-        tar -zxf /tmp/cmake-3.27.3-linux-x86_64.tar.gz --strip=1 -C /usr
-
-        # Install rbuild
-        RUN pip3 install https://github.com/RadeonOpenCompute/rbuild/archive/master.tar.gz numpy yapf==0.28.0
-
-        ENV PATH /opt/miniconda/bin:/code/cmake-3.27.3-linux-x86_64/bin:${PATH}
-        # Install rocm ep dependencies
-        RUN apt-get update &&\
-            apt-get install -y rocrand rccl hipsparse hipfft hipcub hipblas rocthrust
-        """
+RUN apt-get clean && apt-get update && apt-get install -y locales
+RUN locale-gen en_US.UTF-8
+RUN update-locale LANG=en_US.UTF-8
+ENV LC_ALL C.UTF-8
+ENV LANG C.UTF-8
+
+# Support multiarch
+RUN dpkg --add-architecture i386
+
+# Install rocm
+RUN apt-get update && apt-get install -y gnupg2 --no-install-recommends curl && \
+curl -sL http://repo.radeon.com/rocm/rocm.gpg.key | apt-key add - && \
+sh -c 'echo deb [arch=amd64] http://repo.radeon.com/rocm/apt/${ROCM_VERSION}/ ubuntu main > /etc/apt/sources.list.d/rocm.list'
+
+# From docs.amd.com for installing rocm. Needed to install properly
+RUN sh -c \"echo 'Package: *\\nPin: release o=repo.radeon.com\\nPin-priority: 600' > /etc/apt/preferences.d/rocm-pin-600\"
+
+RUN apt-get update &&\
+    apt-get install -y sudo git bash build-essential rocm-dev python3-dev python3-pip miopen-hip \
+    rocblas half aria2 libnuma-dev pkg-config
+
+RUN aria2c -q -d /tmp -o cmake-3.27.3-linux-x86_64.tar.gz \
+https://github.com/Kitware/CMake/releases/download/v3.27.3/cmake-3.27.3-linux-x86_64.tar.gz &&\
+tar -zxf /tmp/cmake-3.27.3-linux-x86_64.tar.gz --strip=1 -C /usr
+
+# Install rbuild
+RUN pip3 install https://github.com/RadeonOpenCompute/rbuild/archive/master.tar.gz numpy yapf==0.28.0
+
+ENV PATH /opt/miniconda/bin:/code/cmake-3.27.3-linux-x86_64/bin:${PATH}
+# Install rocm ep dependencies
+RUN apt-get update &&\
+    apt-get install -y rocrand rccl hipsparse hipfft hipcub hipblas rocthrust
+"""
 
         if FLAGS.ort_migraphx:
+            if FLAGS.migraphx_version is not None:
+                df+= """ARG MIGRAPHX_VERSION={}""".format(FLAGS.migraphx_version)
+            else:
+                df+= """ARG MIGRAPHX_VERSION=develop"""
+
             df += """
-            # Install MIGraphX from source
-            RUN mkdir -p /migraphx
-            RUN cd /migraphx && git clone --depth=1 --branch ${MIGRAPHX_VERSION} https://github.com/ROCmSoftwarePlatform/AMDMIGraphX src
-            RUN cd /migraphx && rbuild package --cxx /opt/rocm/llvm/bin/clang++ -d /migraphx/deps -B /migraphx/build -S /migraphx/src/ -DPYTHON_EXECUTABLE=/usr/bin/python3
-            RUN dpkg -i /migraphx/build/*.deb
-            RUN rm -rf /migraphx
-            """
+# Install MIGraphX from source
+RUN mkdir -p /migraphx
+RUN cd /migraphx && git clone --depth=1 --branch ${MIGRAPHX_VERSION} https://github.com/ROCmSoftwarePlatform/AMDMIGraphX src
+RUN cd /migraphx && rbuild package --cxx /opt/rocm/llvm/bin/clang++ -d /migraphx/deps -B /migraphx/build -S /migraphx/src/ -DPYTHON_EXECUTABLE=/usr/bin/python3
+RUN dpkg -i /migraphx/build/*.deb
+RUN rm -rf /migraphx
+"""
 
 
     if FLAGS.ort_openvino is not None:
@@ -267,8 +283,6 @@ def dockerfile_for_linux(output_file):
         if FLAGS.rocm_home is not None:
             ep_flags += ' --rocm_home "{}"'.format(FLAGS.rocm_home)
         if FLAGS.ort_migraphx:
-            if FLAGS.migraphx_version is not None:
-                ep_flags += ' --migraphx_version "{}"'.format(FLAGS.migraphx_version)
             ep_flags += " --use_migraphx"
             if FLAGS.migraphx_home is not None:
                 ep_flags += ' --migraphx_home "{}"'.format(FLAGS.migraphx_home)

From e5f7ab30b5a68fc3caa14befaeda4b5aaf0e3c7b Mon Sep 17 00:00:00 2001
From: Ted Themistokleous <tedthemistokleous@amd.com>
Date: Wed, 20 Dec 2023 22:43:38 +0000
Subject: [PATCH 05/32] additional changes to generate migraphx ORT dockerfile

---
 tools/gen_ort_dockerfile.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/tools/gen_ort_dockerfile.py b/tools/gen_ort_dockerfile.py
index b9c9e09..97c989c 100755
--- a/tools/gen_ort_dockerfile.py
+++ b/tools/gen_ort_dockerfile.py
@@ -174,8 +174,8 @@ def dockerfile_for_linux(output_file):
             df += """
 # Install MIGraphX from source
 RUN mkdir -p /migraphx
-RUN cd /migraphx && git clone --depth=1 --branch ${MIGRAPHX_VERSION} https://github.com/ROCmSoftwarePlatform/AMDMIGraphX src
-RUN cd /migraphx && rbuild package --cxx /opt/rocm/llvm/bin/clang++ -d /migraphx/deps -B /migraphx/build -S /migraphx/src/ -DPYTHON_EXECUTABLE=/usr/bin/python3
+RUN cd /migraphx && git clone --depth=1 --branch ${MIGRAPHX_VERSION} https://github.com/ROCm/AMDMIGraphX src
+RUN cd /migraphx && rbuild package --cxx /opt/rocm/llvm/bin/clang++ -d /migraphx/deps -B /migraphx/build -S /migraphx/src/ -DPYTHON_EXECUTABLE=/usr/bin/python3 -DGPU_TARGETS=gfx1100
 RUN dpkg -i /migraphx/build/*.deb
 RUN rm -rf /migraphx
 """
@@ -278,8 +278,8 @@ def dockerfile_for_linux(output_file):
 
     if FLAGS.enable_rocm: 
         ep_flags = "--use_rocm"
-        if FLAGS.rocm_version is not None:
-            ep_flags += ' --rocm_version "{}"'.format(FLAGS.rocm_version)
+        #if FLAGS.rocm_version is not None:
+        #ep_flags += ' --rocm_version "{}"'.format(FLAGS.rocm_version)
         if FLAGS.rocm_home is not None:
             ep_flags += ' --rocm_home "{}"'.format(FLAGS.rocm_home)
         if FLAGS.ort_migraphx:
@@ -487,16 +487,16 @@ def dockerfile_for_windows(output_file):
 
     if FLAGS.enable_rocm:
         ep_flags = "--use_rocm"
-        if FLAGS.rocm_version is not None:
-            ep_flags += ' --rocm_version "{}"'.format(FLAGS.rocm_version)
+        #if FLAGS.rocm_version is not None:
+        #    ep_flags += ' --rocm_version={}'.format(FLAGS.rocm_version)
         if FLAGS.rocm_home is not None:
-            ep_flags += ' --rocm_home "{}"'.format(FLAGS.rocm_home)
+            ep_flags += ' --rocm_home {}'.format(FLAGS.rocm_home)
         if FLAGS.ort_migraphx:
             ep_flags += " --use_migraphx"
             if FLAGS.migraphx_version is not None:
-                ep_flags += ' --migraphx_version "{}"'.format(FLAGS.migraphx_version)
+                ep_flags += ' --migraphx_version {}'.format(FLAGS.migraphx_version)
             if FLAGS.migraphx_home is not None:
-                ep_flags += ' --migraphx_home "{}"'.format(FLAGS.migraphx_home)
+                ep_flags += ' --migraphx_home {}'.format(FLAGS.migraphx_home)
 
     if FLAGS.ort_openvino is not None:
         ep_flags += " --use_openvino CPU_FP32"

From 3e5907b27e2f4577225d7a0e9023ab02306f0547 Mon Sep 17 00:00:00 2001
From: Ted Themistokleous <tedthemistokleous@amd.com>
Date: Fri, 22 Dec 2023 19:51:05 +0000
Subject: [PATCH 06/32] Additional apt/pip libs and changes to onnxruntime for
 migarphx/rocm build

---
 tools/gen_ort_dockerfile.py | 57 +++++++++++++++++++++++++++++--------
 1 file changed, 45 insertions(+), 12 deletions(-)

diff --git a/tools/gen_ort_dockerfile.py b/tools/gen_ort_dockerfile.py
index 97c989c..87b4743 100755
--- a/tools/gen_ort_dockerfile.py
+++ b/tools/gen_ort_dockerfile.py
@@ -162,7 +162,7 @@ def dockerfile_for_linux(output_file):
 ENV PATH /opt/miniconda/bin:/code/cmake-3.27.3-linux-x86_64/bin:${PATH}
 # Install rocm ep dependencies
 RUN apt-get update &&\
-    apt-get install -y rocrand rccl hipsparse hipfft hipcub hipblas rocthrust
+    apt-get install -y rocrand rccl hipsparse hipfft hipcub hipblas rocthrust hip-base rocm-device-libs hipify-clang  miopen-hip-dev rocm-cmake
 """
 
         if FLAGS.ort_migraphx:
@@ -173,9 +173,10 @@ def dockerfile_for_linux(output_file):
 
             df += """
 # Install MIGraphX from source
+ARG GPU_TARGETS='gfx908;gfx90a;gfx1030;gfx1100;gfx1101;gfx1102;gfx940;gfx941;gfx942'
 RUN mkdir -p /migraphx
 RUN cd /migraphx && git clone --depth=1 --branch ${MIGRAPHX_VERSION} https://github.com/ROCm/AMDMIGraphX src
-RUN cd /migraphx && rbuild package --cxx /opt/rocm/llvm/bin/clang++ -d /migraphx/deps -B /migraphx/build -S /migraphx/src/ -DPYTHON_EXECUTABLE=/usr/bin/python3 -DGPU_TARGETS=gfx1100
+RUN cd /migraphx && rbuild package --cxx /opt/rocm/llvm/bin/clang++ -d /migraphx/deps -B /migraphx/build -S /migraphx/src/ -DPYTHON_EXECUTABLE=/usr/bin/python3 -DGPU_TARGETS=${GPU_TARGETS}
 RUN dpkg -i /migraphx/build/*.deb
 RUN rm -rf /migraphx
 """
@@ -239,16 +240,30 @@ def dockerfile_for_linux(output_file):
         (cd onnxruntime && git submodule update --init --recursive)
 
        """
-    else:
+    elif FLAGS.enable_rocm is not None:
         df += """
     #
-    # ONNX Runtime build
+    # onnx runtime build
     #
     ARG ONNXRUNTIME_VERSION
     ARG ONNXRUNTIME_REPO
     ARG ONNXRUNTIME_BUILD_CONFIG
 
-    RUN git clone -b rel-${ONNXRUNTIME_VERSION} --recursive ${ONNXRUNTIME_REPO} onnxruntime && \
+    run git clone -b ${ONNXRUNTIME_VERSION} --recursive ${ONNXRUNTIME_REPO} onnxruntime && \
+        (cd onnxruntime && git submodule update --init --recursive)
+
+        """
+
+    else:
+        df += """
+    #
+    # onnx runtime build
+    #
+    arg onnxruntime_version
+    arg onnxruntime_repo
+    arg onnxruntime_build_config
+
+    run git clone -b rel-${onnxruntime_version} --recursive ${onnxruntime_repo} onnxruntime && \
         (cd onnxruntime && git submodule update --init --recursive)
 
         """
@@ -275,9 +290,16 @@ def dockerfile_for_linux(output_file):
                 ep_flags += " --use_tensorrt_builtin_parser"
             if FLAGS.tensorrt_home is not None:
                 ep_flags += ' --tensorrt_home "{}"'.format(FLAGS.tensorrt_home)
+        cmake_defs = "CMAKE_CUDA_ARCHITECTURES"
+        cuda_archs = "\'60;61;70;75;80;86;90\'"
 
     if FLAGS.enable_rocm: 
         ep_flags = "--use_rocm"
+        ep_flags += " --allow_running_as_root"
+        df += """
+RUN export PATH="/opt/cmake/bin:$PATH"
+RUN export CXXFLAGS="-D__HIP_PLATFORM_AMD__=1 -w"
+        """
         #if FLAGS.rocm_version is not None:
         #ep_flags += ' --rocm_version "{}"'.format(FLAGS.rocm_version)
         if FLAGS.rocm_home is not None:
@@ -286,7 +308,9 @@ def dockerfile_for_linux(output_file):
             ep_flags += " --use_migraphx"
             if FLAGS.migraphx_home is not None:
                 ep_flags += ' --migraphx_home "{}"'.format(FLAGS.migraphx_home)
-       
+        cmake_defs = "CMAKE_HIP_COMPILER"
+        cuda_archs = "/opt/rocm/llvm/bin/clang++"
+        ep_flags += " --allow_running_as_root"
 
     if os.name == "posix":
         if os.getuid() == 0:
@@ -295,13 +319,13 @@ def dockerfile_for_linux(output_file):
     if FLAGS.ort_openvino is not None:
         ep_flags += " --use_openvino CPU_FP32"
 
-    cuda_archs = "60;61;70;75;80;86;90"
 
     df += """
 WORKDIR /workspace/onnxruntime
 ARG COMMON_BUILD_ARGS="--config ${{ONNXRUNTIME_BUILD_CONFIG}} --skip_submodule_sync --parallel --build_shared_lib \
-    --build_dir /workspace/build --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES='{}' "
+    --build_dir /workspace/build --cmake_extra_defines {}={} "
 """.format(
+        cmake_defs,
         cuda_archs
     )
 
@@ -460,7 +484,7 @@ def dockerfile_for_windows(output_file):
 #
 ARG ONNXRUNTIME_VERSION
 ARG ONNXRUNTIME_REPO
-RUN git clone -b rel-%ONNXRUNTIME_VERSION% --recursive %ONNXRUNTIME_REPO% onnxruntime && \
+RUN git clone -b %ONNXRUNTIME_VERSION% --recursive %ONNXRUNTIME_REPO% onnxruntime && \
     (cd onnxruntime && git submodule update --init --recursive)
 """
 
@@ -473,7 +497,7 @@ def dockerfile_for_windows(output_file):
 
     ep_flags = ""
     if FLAGS.enable_gpu:
-        ep_flags = "--use_cuda"
+        ep_flags = "--use_cuda --cmake_extra_defines \"CMAKE_CUDA_ARCHITECTURES=60;61;70;75;80;86;90\" "
         if FLAGS.cuda_version is not None:
             ep_flags += ' --cuda_version "{}"'.format(FLAGS.cuda_version)
         if FLAGS.cuda_home is not None:
@@ -486,7 +510,13 @@ def dockerfile_for_windows(output_file):
                 ep_flags += ' --tensorrt_home "{}"'.format(FLAGS.tensorrt_home)
 
     if FLAGS.enable_rocm:
-        ep_flags = "--use_rocm"
+        df += """
+RUN sed -i 's/list(APPEND HIP_CLANG_FLAGS --amdgpu-target=gfx906 --amdgpu-target=gfx908)/list(APPEND HIP_CLANG_FLAGS --amdgpu-target=gfx906 --amdgpu-target=gfx908 --amdgpu-target=gfx1030)/g'  onnxruntime/cmake/onnxruntime_providers.cmake && \
+    sed -i 's/Version(torch.__version__) >= Version("1.11.0")/Version(torch.__version__).release >= Version("1.11.0").release/g' /workspace/onnxruntime/onnxruntime/python/tools/transformers/torch_onnx_export_helper.py; \
+RUN export PATH="/opt/cmake/bin:$PATH"
+RUN export CXXFLAGS="-D__HIP_PLATFORM_AMD__=1 -w"
+        """
+        ep_flags = "--cmake_extra_defines CMAKE_HIP_COMPILER=/opt/rocm/llvm/bin/clang++ --use_rocm --skip_tests"
         #if FLAGS.rocm_version is not None:
         #    ep_flags += ' --rocm_version={}'.format(FLAGS.rocm_version)
         if FLAGS.rocm_home is not None:
@@ -498,14 +528,17 @@ def dockerfile_for_windows(output_file):
             if FLAGS.migraphx_home is not None:
                 ep_flags += ' --migraphx_home {}'.format(FLAGS.migraphx_home)
 
+        ep_flags += " --allow_running_as_root"
+
     if FLAGS.ort_openvino is not None:
         ep_flags += " --use_openvino CPU_FP32"
 
+
     df += """
 WORKDIR /workspace/onnxruntime
 ARG VS_DEVCMD_BAT="\BuildTools\VC\Auxiliary\Build\vcvars64.bat"
 RUN powershell Set-Content 'build.bat' -value 'call %VS_DEVCMD_BAT%',(Get-Content 'build.bat')
-RUN build.bat --cmake_generator "Visual Studio 17 2022" --config Release --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=60;61;70;75;80;86;90" --skip_submodule_sync --parallel --build_shared_lib --update --build --build_dir /workspace/build {}
+RUN build.bat --cmake_generator "Visual Studio 17 2022" --config Release --skip_submodule_sync --parallel --build_shared_lib --update --build --build_dir /workspace/build {}
 """.format(
         ep_flags
     )

From f4b66dd0a05622d480c0e223c3b7eb28a0d6715b Mon Sep 17 00:00:00 2001
From: Ted Themistokleous <tedthemistokleous@amd.com>
Date: Fri, 22 Dec 2023 20:41:05 +0000
Subject: [PATCH 07/32] Fix last step for MIGraphX specific headers

---
 tools/gen_ort_dockerfile.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/gen_ort_dockerfile.py b/tools/gen_ort_dockerfile.py
index 87b4743..6587e46 100755
--- a/tools/gen_ort_dockerfile.py
+++ b/tools/gen_ort_dockerfile.py
@@ -601,7 +601,7 @@ def dockerfile_for_windows(output_file):
         df += """
 # MIGraphX specific headers and libraries
 WORKDIR /opt/onnxruntime/include
-RUN copy \\workspace\\onnxruntime\\include\\onnxruntime\\core\\providers\\migraphx\\migraphx_provider_factory.h \\opt\\onnxruntime\\include
+RUN copy \\workspace\\onnxruntime\\onnxruntime\\core\\providers\\migraphx\\migraphx_provider_factory.h \\opt\\onnxruntime\\include
 
 WORKDIR /opt/onnxruntime/lib
 RUN copy \\workspace\\build\\Release\\Release\\onnxruntime_providers_migraphx.dll \\opt\\onnxruntime\\bin

From aa89843fa8ca38e8e1281c4e21aeaa930209ef5b Mon Sep 17 00:00:00 2001
From: Ted Themistokleous <tedthemistokleous@amd.com>
Date: Wed, 10 Jan 2024 01:54:14 +0000
Subject: [PATCH 08/32] Add CMake Hooks for enabling MIGraphX/ROCm in triton
 server build

---
 CMakeLists.txt              |   81 +++
 tools/gen_ort_dockerfile.py | 1007 +++++++++++++++++------------------
 2 files changed, 573 insertions(+), 515 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 4130a3f..6231594 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -82,15 +82,36 @@ project(tritononnxruntimebackend LANGUAGES C CXX)
 #     igpu. If not set, the current platform will be used. If building on
 #     Jetpack, always set to igpu to avoid misdetection.
 #
+#   - If you want ROCm support set
+#     TRITON_ENABLE_ONNXRUNTIME_ROCM=ON and set
+#     TRITON_BUILD_ONNXRUNTIME_ROCM_VERSION to the ROCm stack
+#     version that is compatible with the specified version of ONNX
+#     Runtime.
+#
+#   - If you want MIGraphX support set
+#     TRITON_ENABLE_ONNXRUNTIME_MIGRAPHX=ON and set
+#     TRITON_BUILD_ONNXRUNTIME_MIGRAPHX_VERSION to the MIGraphX
+#     version that is compatible with the specified version of ONNX
+#     Runtime. Requires that ROCm Support also be set
+#
 #   - If you want to disable GPU usage, set TRITON_ENABLE_GPU=OFF.
 #    This will make builds with CUDA and TensorRT flags to fail.
 #
 option(TRITON_ENABLE_GPU "Enable GPU support in backend" ON)
+option(TRITON_ENABLE_ROCM "Enable AMD GPU support in backend" ON)
 option(TRITON_ENABLE_STATS "Include statistics collections in backend" ON)
 option(TRITON_ENABLE_ONNXRUNTIME_TENSORRT
   "Enable TensorRT execution provider for ONNXRuntime backend in server" OFF)
+option(TRITON_ENABLE_ONNXRUNTIME_ROCM
+  "Enable ROCm execution provider for ONNXRuntime backend in server" OFF)
+option(TRITON_ENABLE_ONNXRUNTIME_MIGRAPHX
+  "Enable MIGraphX execution provider for ONNXRuntime backend in server" OFF)
 option(TRITON_ENABLE_ONNXRUNTIME_OPENVINO
   "Enable OpenVINO execution provider for ONNXRuntime backend in server" OFF)
+set(TRITON_BUILD_ROCM_VERSION "" CACHE STRING "Version of ROCm install")
+set(TRITON_BUILD_ROCM_HOME "" CACHE PATH "Path to ROCm install")
+set(TRITON_BUILD_MIGRAPHX_VERSION "" CACHE STRING "Version of MIGraphX install")
+set(TRITON_BUILD_MIGRAPHX_HOME "" CACHE PATH "Path to MIGraphX install")
 set(TRITON_BUILD_CONTAINER "" CACHE STRING "Triton container to use a base for build")
 set(TRITON_BUILD_CONTAINER_VERSION "" CACHE STRING "Triton container version to target")
 set(TRITON_BUILD_ONNXRUNTIME_VERSION "" CACHE STRING "ONNXRuntime version to build")
@@ -122,6 +143,12 @@ if (NOT TRITON_ENABLE_GPU)
   endif() # TRITON_ENABLE_ONNXRUNTIME_TENSORRT
 endif() # NOT TRITON_ENABLE_GPU
 
+if (NOT TRITON_ENABLE_ROCM)
+  if (TRITON_ENABLE_ONNXRUNTIME_MIGRAPHX)
+    message(FATAL_ERROR "TRITON_ENABLE_ONNXRUNTIME_MIGRAPHX=ON requires TRITON_ENABLE_ROCM=ON")
+  endif() # TRITON_ENABLE_ONNXRUNTIME_MIGRAPHX
+endif() # NOT TRITON_ENABLE_ROCM
+
 if(NOT CMAKE_BUILD_TYPE)
   set(CMAKE_BUILD_TYPE Release)
 endif()
@@ -201,6 +228,13 @@ if(${TRITON_ENABLE_GPU})
   find_package(CUDAToolkit REQUIRED)
 endif() # TRITON_ENABLE_GPU
 
+#
+# ROCM
+#
+if(${TRITON_ENABLE_ROCM})
+  find_package(HIPToolkit REQUIRED)
+endif() # TRITON_ENABLE_ROCM
+
 #
 # Shared library implementing the Triton Backend API
 #
@@ -234,6 +268,13 @@ target_compile_options(
   $<$<CXX_COMPILER_ID:MSVC>:/Wall /D_WIN32_WINNT=0x0A00 /EHsc /Zc:preprocessor>
 )
 
+if(${TRITON_ENABLE_ROCM})
+  target_compile_definitions(
+    triton-onnxruntime-backend
+    PRIVATE TRITON_ENABLE_ROCM=1
+  )
+endif() # TRITON_ENABLE_ROCM
+
 if(${TRITON_ENABLE_GPU})
   target_compile_definitions(
     triton-onnxruntime-backend
@@ -253,6 +294,20 @@ if(${TRITON_ENABLE_ONNXRUNTIME_OPENVINO})
   )
 endif() # TRITON_ENABLE_ONNXRUNTIME_OPENVINO
 
+if(${TRITON_ENABLE_ONNXRUNTIME_ROCM})
+  target_compile_definitions(
+    triton-onnxruntime-backend
+    PRIVATE TRITON_ENABLE_ONNXRUNTIME_ROCM=1
+  )
+endif() # TRITON_ENABLE_ONNXRUNTIME_MIGRAPHX
+
+if(${TRITON_ENABLE_ONNXRUNTIME_MIGRAPHX})
+  target_compile_definitions(
+    triton-onnxruntime-backend
+    PRIVATE TRITON_ENABLE_ONNXRUNTIME_MIGRAPHX=1
+  )
+endif() # TRITON_ENABLE_ONNXRUNTIME_MIGRAPHX
+
 if (WIN32)
 set_target_properties(
   triton-onnxruntime-backend
@@ -305,6 +360,14 @@ if(${TRITON_ENABLE_GPU})
   )
 endif() # TRITON_ENABLE_GPU
 
+if(${TRITON_ENABLE_ROCM})
+  target_link_libraries(
+    triton-onnxruntime-backend
+    PRIVATE
+      CUDA::hiprt
+  )
+endif()  TRITON_ENABLE_ROCM
+
 if(${TRITON_ENABLE_ONNXRUNTIME_OPENVINO})
   target_link_libraries(
     triton-onnxruntime-backend
@@ -339,6 +402,24 @@ if(TRITON_ONNXRUNTIME_DOCKER_BUILD)
   if(${TRITON_ENABLE_ONNXRUNTIME_OPENVINO})
     set(_GEN_FLAGS ${_GEN_FLAGS} "--ort-openvino=${TRITON_BUILD_ONNXRUNTIME_OPENVINO_VERSION}")
   endif() # TRITON_ENABLE_ONNXRUNTIME_OPENVINO
+  if(NOT ${TRITON_BUILD_ROCM_VERSION} STREQUAL "")
+    set(_GEN_FLAGS ${_GEN_FLAGS} "--rocm-version=${TRITON_BUILD_ROCM_VERSION}")
+  endif() # TRITON_BUILD_ROCM_VERSION
+  if(NOT ${TRITON_BUILD_ROCM_HOME} STREQUAL "")
+    set(_GEN_FLAGS ${_GEN_FLAGS} "--migraphx-home=${TRITON_BUILD_ROCM_HOME}")
+  endif() # TRITON_BUILD_ROCM_HOME
+  if(${TRITON_ENABLE_ONNXRUNTIME_ROCM})
+    set(_GEN_FLAGS ${_GEN_FLAGS} "--enable-rocm")
+  endif() # TRITON_ENABLE_ONNXRUNTIME_ROCM
+  if(NOT ${TRITON_BUILD_MIGRAPHX_VERSION} STREQUAL "")
+    set(_GEN_FLAGS ${_GEN_FLAGS} "--migraphx-version=${TRITON_BUILD_MIGRAPHX_VERSION}")
+  endif() # TRITON_BUILD_MIGRAPHX_VERSION
+  if(NOT ${TRITON_BUILD_MIGRAPHX_HOME} STREQUAL "")
+    set(_GEN_FLAGS ${_GEN_FLAGS} "--migraphx-home=${TRITON_BUILD_MIGRAPHX_HOME}")
+  endif() # TRITON_BUILD_MIGRAPHX_HOME
+  if(${TRITON_ENABLE_ONNXRUNTIME_MIGRAPHX})
+    set(_GEN_FLAGS ${_GEN_FLAGS} "--ort-migraphx")
+  endif() # TRITON_ENABLE_ONNXRUNTIME_MIGRAPHX
 
   set(ENABLE_GPU_EXTRA_ARGS "")
   if(${TRITON_ENABLE_GPU})
diff --git a/tools/gen_ort_dockerfile.py b/tools/gen_ort_dockerfile.py
index 6aa7b9c..1018329 100755
--- a/tools/gen_ort_dockerfile.py
+++ b/tools/gen_ort_dockerfile.py
@@ -173,583 +173,560 @@ def dockerfile_for_linux(output_file):
 # Install MIGraphX from source
 ARG GPU_TARGETS='gfx908;gfx90a;gfx1030;gfx1100;gfx1101;gfx1102;gfx940;gfx941;gfx942'
 RUN mkdir -p /migraphx
-RUN cd /migraphx && git clone --depth=1 --branch ${MIGRAPHX_VERSION} https://github.com/ROCm/AMDMIGraphX src
-RUN cd /migraphx && rbuild package --cxx /opt/rocm/llvm/bin/clang++ -d /migraphx/deps -B /migraphx/build -S /migraphx/src/ -DPYTHON_EXECUTABLE=/usr/bin/python3 -DGPU_TARGETS=${GPU_TARGETS}
-RUN dpkg -i /migraphx/build/*.deb
-RUN rm -rf /migraphx
-"""
-
+    RUN cd /migraphx && git clone --depth=1 --branch ${MIGRAPHX_VERSION} https://github.com/ROCm/AMDMIGraphX src
+    RUN cd /migraphx && rbuild package --cxx /opt/rocm/llvm/bin/clang++ -d /migraphx/deps -B /migraphx/build -S /migraphx/src/ -DPYTHON_EXECUTABLE=/usr/bin/python3 -DGPU_TARGETS=${GPU_TARGETS}
+    RUN dpkg -i /migraphx/build/*.deb
+    RUN rm -rf /migraphx
+    """
 
-    if FLAGS.ort_openvino is not None:
-        df += """
-# Install OpenVINO
-ARG ONNXRUNTIME_OPENVINO_VERSION
-ENV INTEL_OPENVINO_DIR /opt/intel/openvino_${ONNXRUNTIME_OPENVINO_VERSION}
-
-# Step 1: Download and install core components
-# Ref: https://docs.openvino.ai/2023.0/openvino_docs_install_guides_installing_openvino_from_archive_linux.html#step-1-download-and-install-the-openvino-core-components
-RUN curl -L https://storage.openvinotoolkit.org/repositories/openvino/packages/2023.0/linux/l_openvino_toolkit_ubuntu22_2023.0.0.10926.b4452d56304_x86_64.tgz --output openvino_${ONNXRUNTIME_OPENVINO_VERSION}.tgz && \
-    tar -xf openvino_${ONNXRUNTIME_OPENVINO_VERSION}.tgz && \
-    mkdir -p ${INTEL_OPENVINO_DIR} && \
-    mv l_openvino_toolkit_ubuntu22_2023.0.0.10926.b4452d56304_x86_64/* ${INTEL_OPENVINO_DIR} && \
-    rm openvino_${ONNXRUNTIME_OPENVINO_VERSION}.tgz && \
-    (cd ${INTEL_OPENVINO_DIR}/install_dependencies && \
-        ./install_openvino_dependencies.sh -y) && \
-    ln -s ${INTEL_OPENVINO_DIR} ${INTEL_OPENVINO_DIR}/../openvino_`echo ${ONNXRUNTIME_OPENVINO_VERSION} | awk '{print substr($0,0,4)}'`
-
-# Step 2: Configure the environment
-# Ref: https://docs.openvino.ai/2023.0/openvino_docs_install_guides_installing_openvino_from_archive_linux.html#step-2-configure-the-environment
-ENV InferenceEngine_DIR=$INTEL_OPENVINO_DIR/runtime/cmake
-ENV ngraph_DIR=$INTEL_OPENVINO_DIR/runtime/cmake
-ENV OpenVINO_DIR=$INTEL_OPENVINO_DIR/runtime/cmake
-ENV LD_LIBRARY_PATH $INTEL_OPENVINO_DIR/runtime/lib/intel64:$LD_LIBRARY_PATH
-ENV PKG_CONFIG_PATH=$INTEL_OPENVINO_DIR/runtime/lib/intel64/pkgconfig
-ENV PYTHONPATH $INTEL_OPENVINO_DIR/python/python3.10:$INTEL_OPENVINO_DIR/python/python3:$PYTHONPATH
-"""
 
-    ## TEMPORARY: Using the tensorrt-8.0 branch until ORT 1.9 release to enable ORT backend with TRT 8.0 support.
-    # For ORT versions 1.8.0 and below the behavior will remain same. For ORT version 1.8.1 we will
-    # use tensorrt-8.0 branch instead of using rel-1.8.1
-    # From ORT 1.9 onwards we will switch back to using rel-* branches
-    if FLAGS.ort_version == "1.8.1":
-        df += """
-    #
-    # ONNX Runtime build
-    #
-    ARG ONNXRUNTIME_VERSION
-    ARG ONNXRUNTIME_REPO
-    ARG ONNXRUNTIME_BUILD_CONFIG
-
-    RUN git clone -b tensorrt-8.0 --recursive ${ONNXRUNTIME_REPO} onnxruntime && \
-        (cd onnxruntime && git submodule update --init --recursive)
-
-       """
-    # Use the tensorrt-8.5ea branch to use Tensor RT 8.5a to use the built-in tensorrt parser
-    elif FLAGS.ort_version == "1.12.1":
-        df += """
-    #
-    # ONNX Runtime build
-    #
-    ARG ONNXRUNTIME_VERSION
-    ARG ONNXRUNTIME_REPO
-    ARG ONNXRUNTIME_BUILD_CONFIG
+        if FLAGS.ort_openvino is not None:
+            df += """
+    # Install OpenVINO
+    ARG ONNXRUNTIME_OPENVINO_VERSION
+    ENV INTEL_OPENVINO_DIR /opt/intel/openvino_${ONNXRUNTIME_OPENVINO_VERSION}
+
+    # Step 1: Download and install core components
+    # Ref: https://docs.openvino.ai/2023.0/openvino_docs_install_guides_installing_openvino_from_archive_linux.html#step-1-download-and-install-the-openvino-core-components
+    RUN curl -L https://storage.openvinotoolkit.org/repositories/openvino/packages/2023.0/linux/l_openvino_toolkit_ubuntu22_2023.0.0.10926.b4452d56304_x86_64.tgz --output openvino_${ONNXRUNTIME_OPENVINO_VERSION}.tgz && \
+        tar -xf openvino_${ONNXRUNTIME_OPENVINO_VERSION}.tgz && \
+        mkdir -p ${INTEL_OPENVINO_DIR} && \
+        mv l_openvino_toolkit_ubuntu22_2023.0.0.10926.b4452d56304_x86_64/* ${INTEL_OPENVINO_DIR} && \
+        rm openvino_${ONNXRUNTIME_OPENVINO_VERSION}.tgz && \
+        (cd ${INTEL_OPENVINO_DIR}/install_dependencies && \
+            ./install_openvino_dependencies.sh -y) && \
+        ln -s ${INTEL_OPENVINO_DIR} ${INTEL_OPENVINO_DIR}/../openvino_`echo ${ONNXRUNTIME_OPENVINO_VERSION} | awk '{print substr($0,0,4)}'`
+
+    # Step 2: Configure the environment
+    # Ref: https://docs.openvino.ai/2023.0/openvino_docs_install_guides_installing_openvino_from_archive_linux.html#step-2-configure-the-environment
+    ENV InferenceEngine_DIR=$INTEL_OPENVINO_DIR/runtime/cmake
+    ENV ngraph_DIR=$INTEL_OPENVINO_DIR/runtime/cmake
+    ENV OpenVINO_DIR=$INTEL_OPENVINO_DIR/runtime/cmake
+    ENV LD_LIBRARY_PATH $INTEL_OPENVINO_DIR/runtime/lib/intel64:$LD_LIBRARY_PATH
+    ENV PKG_CONFIG_PATH=$INTEL_OPENVINO_DIR/runtime/lib/intel64/pkgconfig
+    ENV PYTHONPATH $INTEL_OPENVINO_DIR/python/python3.10:$INTEL_OPENVINO_DIR/python/python3:$PYTHONPATH
+    """
+
+        ## TEMPORARY: Using the tensorrt-8.0 branch until ORT 1.9 release to enable ORT backend with TRT 8.0 support.
+        # For ORT versions 1.8.0 and below the behavior will remain same. For ORT version 1.8.1 we will
+        # use tensorrt-8.0 branch instead of using rel-1.8.1
+        # From ORT 1.9 onwards we will switch back to using rel-* branches
+        if FLAGS.ort_version == "1.8.1":
+            df += """
+        #
+        # ONNX Runtime build
+        #
+        ARG ONNXRUNTIME_VERSION
+        ARG ONNXRUNTIME_REPO
+        ARG ONNXRUNTIME_BUILD_CONFIG
 
-    RUN git clone -b tensorrt-8.5ea --recursive ${ONNXRUNTIME_REPO} onnxruntime && \
-        (cd onnxruntime && git submodule update --init --recursive)
+        RUN git clone -b tensorrt-8.0 --recursive ${ONNXRUNTIME_REPO} onnxruntime && \
+            (cd onnxruntime && git submodule update --init --recursive)
 
-       """
-    elif FLAGS.enable_rocm is not None:
-        df += """
-    #
-    # onnx runtime build
-    #
-    ARG ONNXRUNTIME_VERSION
-    ARG ONNXRUNTIME_REPO
-    ARG ONNXRUNTIME_BUILD_CONFIG
+        """
+        # Use the tensorrt-8.5ea branch to use Tensor RT 8.5a to use the built-in tensorrt parser
+        elif FLAGS.ort_version == "1.12.1":
+            df += """
+        #
+        # ONNX Runtime build
+        #
+        ARG ONNXRUNTIME_VERSION
+        ARG ONNXRUNTIME_REPO
+        ARG ONNXRUNTIME_BUILD_CONFIG
 
-    run git clone -b ${ONNXRUNTIME_VERSION} --recursive ${ONNXRUNTIME_REPO} onnxruntime && \
-        (cd onnxruntime && git submodule update --init --recursive)
+        RUN git clone -b tensorrt-8.5ea --recursive ${ONNXRUNTIME_REPO} onnxruntime && \
+            (cd onnxruntime && git submodule update --init --recursive)
 
         """
+        elif FLAGS.enable_rocm is not None:
+            df += """
+        #
+        # onnx runtime build
+        #
+        ARG ONNXRUNTIME_VERSION
+        ARG ONNXRUNTIME_REPO
+        ARG ONNXRUNTIME_BUILD_CONFIG
 
-    else:
-        df += """
-    #
-    # onnx runtime build
-    #
-    arg onnxruntime_version
-    arg onnxruntime_repo
-    arg onnxruntime_build_config
+        run git clone -b ${ONNXRUNTIME_VERSION} --recursive ${ONNXRUNTIME_REPO} onnxruntime && \
+            (cd onnxruntime && git submodule update --init --recursive)
 
-    run git clone -b rel-${onnxruntime_version} --recursive ${onnxruntime_repo} onnxruntime && \
-        (cd onnxruntime && git submodule update --init --recursive)
+            """
 
-        """
+        else:
+            df += """
+        #
+        # onnx runtime build
+        #
+        arg onnxruntime_version
+        arg onnxruntime_repo
+        arg onnxruntime_build_config
 
-    if FLAGS.onnx_tensorrt_tag != "":
-        df += """
-    RUN (cd /workspace/onnxruntime/cmake/external/onnx-tensorrt && git fetch origin {}:ortrefbranch && git checkout ortrefbranch)
-    """.format(
-            FLAGS.onnx_tensorrt_tag
-        )
+        run git clone -b rel-${onnxruntime_version} --recursive ${onnxruntime_repo} onnxruntime && \
+            (cd onnxruntime && git submodule update --init --recursive)
 
-    ep_flags = ""
-    if FLAGS.enable_gpu:
-        ep_flags = "--use_cuda"
-        if FLAGS.cuda_version is not None:
-            ep_flags += ' --cuda_version "{}"'.format(FLAGS.cuda_version)
-        if FLAGS.cuda_home is not None:
-            ep_flags += ' --cuda_home "{}"'.format(FLAGS.cuda_home)
-        if FLAGS.cudnn_home is not None:
-            ep_flags += ' --cudnn_home "{}"'.format(FLAGS.cudnn_home)
-        elif target_platform() == "igpu":
-            ep_flags += ' --cudnn_home "/usr/lib/aarch64-linux-gnu"'
-        if FLAGS.ort_tensorrt:
-            ep_flags += " --use_tensorrt"
-            if FLAGS.ort_version >= "1.12.1":
-                ep_flags += " --use_tensorrt_builtin_parser"
-            if FLAGS.tensorrt_home is not None:
-                ep_flags += ' --tensorrt_home "{}"'.format(FLAGS.tensorrt_home)
-        cmake_defs = "CMAKE_CUDA_ARCHITECTURES"
-        cuda_archs = "\'60;61;70;75;80;86;90\'"
-
-    if FLAGS.enable_rocm: 
-        ep_flags = "--use_rocm"
-        ep_flags += " --allow_running_as_root"
-        df += """
-RUN export PATH="/opt/cmake/bin:$PATH"
-RUN export CXXFLAGS="-D__HIP_PLATFORM_AMD__=1 -w"
-        """
-        #if FLAGS.rocm_version is not None:
-        #ep_flags += ' --rocm_version "{}"'.format(FLAGS.rocm_version)
-        if FLAGS.rocm_home is not None:
-            ep_flags += ' --rocm_home "{}"'.format(FLAGS.rocm_home)
-        if FLAGS.ort_migraphx:
-            ep_flags += " --use_migraphx"
-            if FLAGS.migraphx_home is not None:
-                ep_flags += ' --migraphx_home "{}"'.format(FLAGS.migraphx_home)
-        cmake_defs = "CMAKE_HIP_COMPILER"
-        cuda_archs = "/opt/rocm/llvm/bin/clang++"
-        ep_flags += " --allow_running_as_root"
-
-    if os.name == "posix":
-        if os.getuid() == 0:
-            ep_flags += " --allow_running_as_root"
+            """
 
-    if FLAGS.ort_openvino is not None:
-        ep_flags += " --use_openvino CPU_FP32"
+        if FLAGS.onnx_tensorrt_tag != "":
+            df += """
+        RUN (cd /workspace/onnxruntime/cmake/external/onnx-tensorrt && git fetch origin {}:ortrefbranch && git checkout ortrefbranch)
+        """.format(
+                FLAGS.onnx_tensorrt_tag
+            )
+
+        ep_flags = ""
+        if FLAGS.enable_gpu:
+            ep_flags = "--use_cuda"
+            if FLAGS.cuda_version is not None:
+                ep_flags += ' --cuda_version "{}"'.format(FLAGS.cuda_version)
+            if FLAGS.cuda_home is not None:
+                ep_flags += ' --cuda_home "{}"'.format(FLAGS.cuda_home)
+            if FLAGS.cudnn_home is not None:
+                ep_flags += ' --cudnn_home "{}"'.format(FLAGS.cudnn_home)
+            if FLAGS.ort_tensorrt:
+                ep_flags += " --use_tensorrt"
+                if FLAGS.ort_version >= "1.12.1":
+                    ep_flags += " --use_tensorrt_builtin_parser"
+                if FLAGS.tensorrt_home is not None:
+                    ep_flags += ' --tensorrt_home "{}"'.format(FLAGS.tensorrt_home)
+            cmake_defs = "CMAKE_CUDA_ARCHITECTURES"
+            cuda_archs = "\'60;61;70;75;80;86;90\'"
+
+        if FLAGS.enable_rocm: 
+            ep_flags = "--use_rocm"
+            ep_flags += " --allow_running_as_root"
+            df += """
+    RUN export PATH="/opt/cmake/bin:$PATH"
+    RUN export CXXFLAGS="-D__HIP_PLATFORM_AMD__=1 -w"
+            """
+            #if FLAGS.rocm_version is not None:
+            #ep_flags += ' --rocm_version "{}"'.format(FLAGS.rocm_version)
+            if FLAGS.rocm_home is not None:
+                ep_flags += ' --rocm_home "{}"'.format(FLAGS.rocm_home)
+            if FLAGS.ort_migraphx:
+                ep_flags += " --use_migraphx"
+                if FLAGS.migraphx_home is not None:
+                    ep_flags += ' --migraphx_home "{}"'.format(FLAGS.migraphx_home)
+            cmake_defs = "CMAKE_HIP_COMPILER"
+            cuda_archs = "/opt/rocm/llvm/bin/clang++"
+            ep_flags += " --allow_running_as_root"
 
-    if target_platform() == "igpu":
-        ep_flags += (
-            " --skip_tests --cmake_extra_defines 'onnxruntime_BUILD_UNIT_TESTS=OFF'"
-        )
-        cuda_archs = "53;62;72;87"
-    else:
-        cuda_archs = "60;61;70;75;80;86;90"
+        if os.name == "posix":
+            if os.getuid() == 0:
+                ep_flags += " --allow_running_as_root"
 
-    df += """
-WORKDIR /workspace/onnxruntime
-ARG COMMON_BUILD_ARGS="--config ${{ONNXRUNTIME_BUILD_CONFIG}} --skip_submodule_sync --parallel --build_shared_lib \
-    --build_dir /workspace/build --cmake_extra_defines {}={} "
-""".format(
-        cmake_defs,
-        cuda_archs
-    )
+        if FLAGS.ort_openvino is not None:
+            ep_flags += " --use_openvino CPU_FP32"
 
-    df += """
-RUN ./build.sh ${{COMMON_BUILD_ARGS}} --update --build {}
-""".format(
-        ep_flags
-    )
 
-    df += """
-#
-# Copy all artifacts needed by the backend to /opt/onnxruntime
-#
-WORKDIR /opt/onnxruntime
-
-RUN mkdir -p /opt/onnxruntime && \
-    cp /workspace/onnxruntime/LICENSE /opt/onnxruntime && \
-    cat /workspace/onnxruntime/cmake/external/onnx/VERSION_NUMBER > /opt/onnxruntime/ort_onnx_version.txt
-
-# ONNX Runtime headers, libraries and binaries
-RUN mkdir -p /opt/onnxruntime/include && \
-    cp /workspace/onnxruntime/include/onnxruntime/core/session/onnxruntime_c_api.h \
-       /opt/onnxruntime/include && \
-    cp /workspace/onnxruntime/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h \
-       /opt/onnxruntime/include && \
-    cp /workspace/onnxruntime/include/onnxruntime/core/providers/cpu/cpu_provider_factory.h \
-       /opt/onnxruntime/include
-
-RUN mkdir -p /opt/onnxruntime/lib && \
-    cp /workspace/build/${ONNXRUNTIME_BUILD_CONFIG}/libonnxruntime_providers_shared.so \
-       /opt/onnxruntime/lib && \
-    cp /workspace/build/${ONNXRUNTIME_BUILD_CONFIG}/libonnxruntime.so \
-       /opt/onnxruntime/lib
-"""
-    if target_platform() == "igpu":
         df += """
-RUN mkdir -p /opt/onnxruntime/bin
-"""
-    else:
-        df += """
-RUN mkdir -p /opt/onnxruntime/bin && \
-    cp /workspace/build/${ONNXRUNTIME_BUILD_CONFIG}/onnxruntime_perf_test \
-       /opt/onnxruntime/bin && \
-    cp /workspace/build/${ONNXRUNTIME_BUILD_CONFIG}/onnx_test_runner \
-       /opt/onnxruntime/bin && \
-    (cd /opt/onnxruntime/bin && chmod a+x *)
-"""
+    WORKDIR /workspace/onnxruntime
+    ARG COMMON_BUILD_ARGS="--config ${{ONNXRUNTIME_BUILD_CONFIG}} --skip_submodule_sync --parallel --build_shared_lib \
+        --build_dir /workspace/build --cmake_extra_defines {}={} "
+    """.format(
+            cmake_defs,
+            cuda_archs
+        )
 
-    if FLAGS.enable_gpu:
         df += """
-RUN cp /workspace/build/${ONNXRUNTIME_BUILD_CONFIG}/libonnxruntime_providers_cuda.so \
-       /opt/onnxruntime/lib
-"""
+    RUN ./build.sh ${{COMMON_BUILD_ARGS}} --update --build {}
+    """.format(
+            ep_flags
+        )
 
-    if FLAGS.enable_rocm:
         df += """
-RUN cp /workspace/build/${ONNXRUNTIME_BUILD_CONFIG}/libonnxruntime_providers_rocm.so \
-       /opt/onnxruntime/lib
-"""
+    #
+    # Copy all artifacts needed by the backend to /opt/onnxruntime
+    #
+    WORKDIR /opt/onnxruntime
+
+    RUN mkdir -p /opt/onnxruntime && \
+        cp /workspace/onnxruntime/LICENSE /opt/onnxruntime && \
+        cat /workspace/onnxruntime/cmake/external/onnx/VERSION_NUMBER > /opt/onnxruntime/ort_onnx_version.txt
+
+    # ONNX Runtime headers, libraries and binaries
+    RUN mkdir -p /opt/onnxruntime/include && \
+        cp /workspace/onnxruntime/include/onnxruntime/core/session/onnxruntime_c_api.h \
+        /opt/onnxruntime/include && \
+        cp /workspace/onnxruntime/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h \
+        /opt/onnxruntime/include && \
+        cp /workspace/onnxruntime/include/onnxruntime/core/providers/cpu/cpu_provider_factory.h \
+        /opt/onnxruntime/include
+
+    RUN mkdir -p /opt/onnxruntime/lib && \
+        cp /workspace/build/${ONNXRUNTIME_BUILD_CONFIG}/libonnxruntime_providers_shared.so \
+        /opt/onnxruntime/lib && \
+        cp /workspace/build/${ONNXRUNTIME_BUILD_CONFIG}/libonnxruntime.so \
+        /opt/onnxruntime/lib
+
+    RUN mkdir -p /opt/onnxruntime/bin && \
+        cp /workspace/build/${ONNXRUNTIME_BUILD_CONFIG}/onnxruntime_perf_test \
+        /opt/onnxruntime/bin && \
+        cp /workspace/build/${ONNXRUNTIME_BUILD_CONFIG}/onnx_test_runner \
+        /opt/onnxruntime/bin && \
+        (cd /opt/onnxruntime/bin && chmod a+x *)
+    """
+        if FLAGS.enable_gpu:
+            df += """
+    RUN cp /workspace/build/${ONNXRUNTIME_BUILD_CONFIG}/libonnxruntime_providers_cuda.so \
+        /opt/onnxruntime/lib
+    """
 
-    if FLAGS.ort_tensorrt:
-        df += """
-# TensorRT specific headers and libraries
-RUN cp /workspace/onnxruntime/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.h \
-       /opt/onnxruntime/include && \
-    cp /workspace/build/${ONNXRUNTIME_BUILD_CONFIG}/libonnxruntime_providers_tensorrt.so \
-       /opt/onnxruntime/lib
-"""
+        if FLAGS.enable_rocm:
+            df += """
+    RUN cp /workspace/build/${ONNXRUNTIME_BUILD_CONFIG}/libonnxruntime_providers_rocm.so \
+        /opt/onnxruntime/lib
+    """
 
-    if FLAGS.ort_migraphx:
-        df += """
-# TensorRT specific headers and libraries
-RUN cp /workspace/onnxruntime/include/onnxruntime/core/providers/migraphx/migraphx_provider_factory.h \
-       /opt/onnxruntime/include && \
-    cp /workspace/build/${ONNXRUNTIME_BUILD_CONFIG}/libonnxruntime_providers_migraphx.so \
-       /opt/onnxruntime/lib
-"""
+        if FLAGS.ort_tensorrt:
+            df += """
+    # TensorRT specific headers and libraries
+    RUN cp /workspace/onnxruntime/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.h \
+        /opt/onnxruntime/include && \
+        cp /workspace/build/${ONNXRUNTIME_BUILD_CONFIG}/libonnxruntime_providers_tensorrt.so \
+        /opt/onnxruntime/lib
+    """
 
-    if FLAGS.ort_openvino is not None:
-        df += """
-# OpenVino specific headers and libraries
-RUN cp -r ${INTEL_OPENVINO_DIR}/docs/licensing /opt/onnxruntime/LICENSE.openvino
-
-RUN cp /workspace/onnxruntime/include/onnxruntime/core/providers/openvino/openvino_provider_factory.h \
-       /opt/onnxruntime/include
-
-RUN cp /workspace/build/${ONNXRUNTIME_BUILD_CONFIG}/libonnxruntime_providers_openvino.so \
-       /opt/onnxruntime/lib && \
-    cp ${INTEL_OPENVINO_DIR}/runtime/lib/intel64/libopenvino.so.${ONNXRUNTIME_OPENVINO_VERSION} \
-       /opt/onnxruntime/lib && \
-    cp ${INTEL_OPENVINO_DIR}/runtime/lib/intel64/libopenvino_c.so.${ONNXRUNTIME_OPENVINO_VERSION} \
-       /opt/onnxruntime/lib && \
-    cp ${INTEL_OPENVINO_DIR}/runtime/lib/intel64/libopenvino_intel_cpu_plugin.so \
-       /opt/onnxruntime/lib && \
-    cp ${INTEL_OPENVINO_DIR}/runtime/lib/intel64/libopenvino_ir_frontend.so.${ONNXRUNTIME_OPENVINO_VERSION} \
-       /opt/onnxruntime/lib && \
-    cp ${INTEL_OPENVINO_DIR}/runtime/lib/intel64/libopenvino_onnx_frontend.so.${ONNXRUNTIME_OPENVINO_VERSION} \
-       /opt/onnxruntime/lib && \
-    cp /usr/lib/x86_64-linux-gnu/libtbb.so.12 /opt/onnxruntime/lib && \
-    cp /usr/lib/x86_64-linux-gnu/libpugixml.so.1 /opt/onnxruntime/lib
-
-RUN OV_SHORT_VERSION=`echo ${ONNXRUNTIME_OPENVINO_VERSION} | awk '{ split($0,a,"."); print substr(a[1],3) a[2] a[3] }'` && \
-    (cd /opt/onnxruntime/lib && \
-        chmod a-x * && \
-        ln -s libopenvino.so.${ONNXRUNTIME_OPENVINO_VERSION} libopenvino.so.${OV_SHORT_VERSION} && \
-        ln -s libopenvino.so.${ONNXRUNTIME_OPENVINO_VERSION} libopenvino.so && \
-        ln -s libopenvino_c.so.${ONNXRUNTIME_OPENVINO_VERSION} libopenvino_c.so.${OV_SHORT_VERSION} && \
-        ln -s libopenvino_c.so.${ONNXRUNTIME_OPENVINO_VERSION} libopenvino_c.so && \
-        ln -s libopenvino_ir_frontend.so.${ONNXRUNTIME_OPENVINO_VERSION} libopenvino_ir_frontend.so.${OV_SHORT_VERSION} && \
-        ln -s libopenvino_ir_frontend.so.${ONNXRUNTIME_OPENVINO_VERSION} libopenvino_ir_frontend.so && \
-        ln -s libopenvino_onnx_frontend.so.${ONNXRUNTIME_OPENVINO_VERSION} libopenvino_onnx_frontend.so.${OV_SHORT_VERSION} && \
-        ln -s libopenvino_onnx_frontend.so.${ONNXRUNTIME_OPENVINO_VERSION} libopenvino_onnx_frontend.so)
-"""
-    # Linking compiled ONNX Runtime libraries to their corresponding versioned libraries
-    df += """
-RUN cd /opt/onnxruntime/lib \
-        && ln -s libonnxruntime.so libonnxruntime.so.${ONNXRUNTIME_VERSION}
-"""
-    df += """
-RUN cd /opt/onnxruntime/lib && \
-    for i in `find . -mindepth 1 -maxdepth 1 -type f -name '*\.so*'`; do \
-        patchelf --set-rpath '$ORIGIN' $i; \
-    done
+        if FLAGS.ort_migraphx:
+            df += """
+    # TensorRT specific headers and libraries
+    RUN cp /workspace/onnxruntime/include/onnxruntime/core/providers/migraphx/migraphx_provider_factory.h \
+        /opt/onnxruntime/include && \
+        cp /workspace/build/${ONNXRUNTIME_BUILD_CONFIG}/libonnxruntime_providers_migraphx.so \
+        /opt/onnxruntime/lib
+    """
 
-# For testing copy ONNX custom op library and model
-"""
-    if target_platform() == "igpu":
+        if FLAGS.ort_openvino is not None:
+            df += """
+    # OpenVino specific headers and libraries
+    RUN cp -r ${INTEL_OPENVINO_DIR}/docs/licensing /opt/onnxruntime/LICENSE.openvino
+
+    RUN cp /workspace/onnxruntime/include/onnxruntime/core/providers/openvino/openvino_provider_factory.h \
+        /opt/onnxruntime/include
+
+    RUN cp /workspace/build/${ONNXRUNTIME_BUILD_CONFIG}/libonnxruntime_providers_openvino.so \
+        /opt/onnxruntime/lib && \
+        cp ${INTEL_OPENVINO_DIR}/runtime/lib/intel64/libopenvino.so.${ONNXRUNTIME_OPENVINO_VERSION} \
+        /opt/onnxruntime/lib && \
+        cp ${INTEL_OPENVINO_DIR}/runtime/lib/intel64/libopenvino_c.so.${ONNXRUNTIME_OPENVINO_VERSION} \
+        /opt/onnxruntime/lib && \
+        cp ${INTEL_OPENVINO_DIR}/runtime/lib/intel64/libopenvino_intel_cpu_plugin.so \
+        /opt/onnxruntime/lib && \
+        cp ${INTEL_OPENVINO_DIR}/runtime/lib/intel64/libopenvino_ir_frontend.so.${ONNXRUNTIME_OPENVINO_VERSION} \
+        /opt/onnxruntime/lib && \
+        cp ${INTEL_OPENVINO_DIR}/runtime/lib/intel64/libopenvino_onnx_frontend.so.${ONNXRUNTIME_OPENVINO_VERSION} \
+        /opt/onnxruntime/lib && \
+        cp /usr/lib/x86_64-linux-gnu/libtbb.so.12 /opt/onnxruntime/lib && \
+        cp /usr/lib/x86_64-linux-gnu/libpugixml.so.1 /opt/onnxruntime/lib
+
+    RUN OV_SHORT_VERSION=`echo ${ONNXRUNTIME_OPENVINO_VERSION} | awk '{ split($0,a,"."); print substr(a[1],3) a[2] a[3] }'` && \
+        (cd /opt/onnxruntime/lib && \
+            chmod a-x * && \
+            ln -s libopenvino.so.${ONNXRUNTIME_OPENVINO_VERSION} libopenvino.so.${OV_SHORT_VERSION} && \
+            ln -s libopenvino.so.${ONNXRUNTIME_OPENVINO_VERSION} libopenvino.so && \
+            ln -s libopenvino_c.so.${ONNXRUNTIME_OPENVINO_VERSION} libopenvino_c.so.${OV_SHORT_VERSION} && \
+            ln -s libopenvino_c.so.${ONNXRUNTIME_OPENVINO_VERSION} libopenvino_c.so && \
+            ln -s libopenvino_ir_frontend.so.${ONNXRUNTIME_OPENVINO_VERSION} libopenvino_ir_frontend.so.${OV_SHORT_VERSION} && \
+            ln -s libopenvino_ir_frontend.so.${ONNXRUNTIME_OPENVINO_VERSION} libopenvino_ir_frontend.so && \
+            ln -s libopenvino_onnx_frontend.so.${ONNXRUNTIME_OPENVINO_VERSION} libopenvino_onnx_frontend.so.${OV_SHORT_VERSION} && \
+            ln -s libopenvino_onnx_frontend.so.${ONNXRUNTIME_OPENVINO_VERSION} libopenvino_onnx_frontend.so)
+    """
+        # Linking compiled ONNX Runtime libraries to their corresponding versioned libraries
         df += """
-RUN mkdir -p /opt/onnxruntime/test
-"""
-    else:
+    RUN cd /opt/onnxruntime/lib \
+            && ln -s libonnxruntime.so libonnxruntime.so.${ONNXRUNTIME_VERSION}
+    """
         df += """
-RUN mkdir -p /opt/onnxruntime/test && \
-    cp /workspace/build/${ONNXRUNTIME_BUILD_CONFIG}/libcustom_op_library.so \
-       /opt/onnxruntime/test && \
-    cp /workspace/build/${ONNXRUNTIME_BUILD_CONFIG}/testdata/custom_op_library/custom_op_test.onnx \
-       /opt/onnxruntime/test
-"""
+    RUN cd /opt/onnxruntime/lib && \
+        for i in `find . -mindepth 1 -maxdepth 1 -type f -name '*\.so*'`; do \
+            patchelf --set-rpath '$ORIGIN' $i; \
+        done
+
+    # For testing copy ONNX custom op library and model
+    RUN mkdir -p /opt/onnxruntime/test && \
+        cp /workspace/build/${ONNXRUNTIME_BUILD_CONFIG}/libcustom_op_library.so \
+        /opt/onnxruntime/test && \
+        cp /workspace/build/${ONNXRUNTIME_BUILD_CONFIG}/testdata/custom_op_library/custom_op_test.onnx \
+        /opt/onnxruntime/test
+    """
+
+        with open(output_file, "w") as dfile:
+            dfile.write(df)
+
+
+    def dockerfile_for_windows(output_file):
+        df = dockerfile_common()
+
+        ## TEMPORARY: Using the tensorrt-8.0 branch until ORT 1.9 release to enable ORT backend with TRT 8.0 support.
+        # For ORT versions 1.8.0 and below the behavior will remain same. For ORT version 1.8.1 we will
+        # use tensorrt-8.0 branch instead of using rel-1.8.1
+        # From ORT 1.9 onwards we will switch back to using rel-* branches
+        if FLAGS.ort_version == "1.8.1":
+            df += """
+    SHELL ["cmd", "/S", "/C"]
 
-    with open(output_file, "w") as dfile:
-        dfile.write(df)
+    #
+    # ONNX Runtime build
+    #
+    ARG ONNXRUNTIME_VERSION
+    ARG ONNXRUNTIME_REPO
 
+    RUN git clone -b tensorrt-8.0 --recursive %ONNXRUNTIME_REPO% onnxruntime && \
+        (cd onnxruntime && git submodule update --init --recursive)
+    """
+        else:
+            df += """
+    SHELL ["cmd", "/S", "/C"]
 
-def dockerfile_for_windows(output_file):
-    df = dockerfile_common()
+    #
+    # ONNX Runtime build
+    #
+    ARG ONNXRUNTIME_VERSION
+    ARG ONNXRUNTIME_REPO
+    RUN git clone -b %ONNXRUNTIME_VERSION% --recursive %ONNXRUNTIME_REPO% onnxruntime && \
+        (cd onnxruntime && git submodule update --init --recursive)
+    """
 
-    ## TEMPORARY: Using the tensorrt-8.0 branch until ORT 1.9 release to enable ORT backend with TRT 8.0 support.
-    # For ORT versions 1.8.0 and below the behavior will remain same. For ORT version 1.8.1 we will
-    # use tensorrt-8.0 branch instead of using rel-1.8.1
-    # From ORT 1.9 onwards we will switch back to using rel-* branches
-    if FLAGS.ort_version == "1.8.1":
-        df += """
-SHELL ["cmd", "/S", "/C"]
+        if FLAGS.onnx_tensorrt_tag != "":
+            df += """
+        RUN (cd \\workspace\\onnxruntime\\cmake\\external\\onnx-tensorrt && git fetch origin {}:ortrefbranch && git checkout ortrefbranch)
+        """.format(
+                FLAGS.onnx_tensorrt_tag
+            )
+
+        ep_flags = ""
+        if FLAGS.enable_gpu:
+            ep_flags = "--use_cuda --cmake_extra_defines \"CMAKE_CUDA_ARCHITECTURES=60;61;70;75;80;86;90\" "
+            if FLAGS.cuda_version is not None:
+                ep_flags += ' --cuda_version "{}"'.format(FLAGS.cuda_version)
+            if FLAGS.cuda_home is not None:
+                ep_flags += ' --cuda_home "{}"'.format(FLAGS.cuda_home)
+            if FLAGS.cudnn_home is not None:
+                ep_flags += ' --cudnn_home "{}"'.format(FLAGS.cudnn_home)
+            if FLAGS.ort_tensorrt:
+                ep_flags += " --use_tensorrt"
+                if FLAGS.tensorrt_home is not None:
+                    ep_flags += ' --tensorrt_home "{}"'.format(FLAGS.tensorrt_home)
+
+        if FLAGS.enable_rocm:
+            df += """
+    RUN sed -i 's/list(APPEND HIP_CLANG_FLAGS --amdgpu-target=gfx906 --amdgpu-target=gfx908)/list(APPEND HIP_CLANG_FLAGS --amdgpu-target=gfx906 --amdgpu-target=gfx908 --amdgpu-target=gfx1030)/g'  onnxruntime/cmake/onnxruntime_providers.cmake && \
+        sed -i 's/Version(torch.__version__) >= Version("1.11.0")/Version(torch.__version__).release >= Version("1.11.0").release/g' /workspace/onnxruntime/onnxruntime/python/tools/transformers/torch_onnx_export_helper.py; \
+    RUN export PATH="/opt/cmake/bin:$PATH"
+    RUN export CXXFLAGS="-D__HIP_PLATFORM_AMD__=1 -w"
+            """
+            ep_flags = "--cmake_extra_defines CMAKE_HIP_COMPILER=/opt/rocm/llvm/bin/clang++ --use_rocm --skip_tests"
+            #if FLAGS.rocm_version is not None:
+            #    ep_flags += ' --rocm_version={}'.format(FLAGS.rocm_version)
+            if FLAGS.rocm_home is not None:
+                ep_flags += ' --rocm_home {}'.format(FLAGS.rocm_home)
+            if FLAGS.ort_migraphx:
+                ep_flags += " --use_migraphx"
+                if FLAGS.migraphx_version is not None:
+                    ep_flags += ' --migraphx_version {}'.format(FLAGS.migraphx_version)
+                if FLAGS.migraphx_home is not None:
+                    ep_flags += ' --migraphx_home {}'.format(FLAGS.migraphx_home)
 
-#
-# ONNX Runtime build
-#
-ARG ONNXRUNTIME_VERSION
-ARG ONNXRUNTIME_REPO
+            ep_flags += " --allow_running_as_root"
 
-RUN git clone -b tensorrt-8.0 --recursive %ONNXRUNTIME_REPO% onnxruntime && \
-    (cd onnxruntime && git submodule update --init --recursive)
-"""
-    else:
-        df += """
-SHELL ["cmd", "/S", "/C"]
+        if FLAGS.ort_openvino is not None:
+            ep_flags += " --use_openvino CPU_FP32"
 
-#
-# ONNX Runtime build
-#
-ARG ONNXRUNTIME_VERSION
-ARG ONNXRUNTIME_REPO
-RUN git clone -b %ONNXRUNTIME_VERSION% --recursive %ONNXRUNTIME_REPO% onnxruntime && \
-    (cd onnxruntime && git submodule update --init --recursive)
-"""
 
-    if FLAGS.onnx_tensorrt_tag != "":
         df += """
-    RUN (cd \\workspace\\onnxruntime\\cmake\\external\\onnx-tensorrt && git fetch origin {}:ortrefbranch && git checkout ortrefbranch)
+    WORKDIR /workspace/onnxruntime
+    ARG VS_DEVCMD_BAT="\BuildTools\VC\Auxiliary\Build\vcvars64.bat"
+    RUN powershell Set-Content 'build.bat' -value 'call %VS_DEVCMD_BAT%',(Get-Content 'build.bat')
+    RUN build.bat --cmake_generator "Visual Studio 17 2022" --config Release --skip_submodule_sync --parallel --build_shared_lib --update --build --build_dir /workspace/build {}
     """.format(
-            FLAGS.onnx_tensorrt_tag
+            ep_flags
         )
 
-    ep_flags = ""
-    if FLAGS.enable_gpu:
-        ep_flags = "--use_cuda --cmake_extra_defines \"CMAKE_CUDA_ARCHITECTURES=60;61;70;75;80;86;90\" "
-        if FLAGS.cuda_version is not None:
-            ep_flags += ' --cuda_version "{}"'.format(FLAGS.cuda_version)
-        if FLAGS.cuda_home is not None:
-            ep_flags += ' --cuda_home "{}"'.format(FLAGS.cuda_home)
-        if FLAGS.cudnn_home is not None:
-            ep_flags += ' --cudnn_home "{}"'.format(FLAGS.cudnn_home)
-        if FLAGS.ort_tensorrt:
-            ep_flags += " --use_tensorrt"
-            if FLAGS.tensorrt_home is not None:
-                ep_flags += ' --tensorrt_home "{}"'.format(FLAGS.tensorrt_home)
-
-    if FLAGS.enable_rocm:
         df += """
-RUN sed -i 's/list(APPEND HIP_CLANG_FLAGS --amdgpu-target=gfx906 --amdgpu-target=gfx908)/list(APPEND HIP_CLANG_FLAGS --amdgpu-target=gfx906 --amdgpu-target=gfx908 --amdgpu-target=gfx1030)/g'  onnxruntime/cmake/onnxruntime_providers.cmake && \
-    sed -i 's/Version(torch.__version__) >= Version("1.11.0")/Version(torch.__version__).release >= Version("1.11.0").release/g' /workspace/onnxruntime/onnxruntime/python/tools/transformers/torch_onnx_export_helper.py; \
-RUN export PATH="/opt/cmake/bin:$PATH"
-RUN export CXXFLAGS="-D__HIP_PLATFORM_AMD__=1 -w"
-        """
-        ep_flags = "--cmake_extra_defines CMAKE_HIP_COMPILER=/opt/rocm/llvm/bin/clang++ --use_rocm --skip_tests"
-        #if FLAGS.rocm_version is not None:
-        #    ep_flags += ' --rocm_version={}'.format(FLAGS.rocm_version)
-        if FLAGS.rocm_home is not None:
-            ep_flags += ' --rocm_home {}'.format(FLAGS.rocm_home)
-        if FLAGS.ort_migraphx:
-            ep_flags += " --use_migraphx"
-            if FLAGS.migraphx_version is not None:
-                ep_flags += ' --migraphx_version {}'.format(FLAGS.migraphx_version)
-            if FLAGS.migraphx_home is not None:
-                ep_flags += ' --migraphx_home {}'.format(FLAGS.migraphx_home)
-
-        ep_flags += " --allow_running_as_root"
-
-    if FLAGS.ort_openvino is not None:
-        ep_flags += " --use_openvino CPU_FP32"
-
-
-    df += """
-WORKDIR /workspace/onnxruntime
-ARG VS_DEVCMD_BAT="\BuildTools\VC\Auxiliary\Build\vcvars64.bat"
-RUN powershell Set-Content 'build.bat' -value 'call %VS_DEVCMD_BAT%',(Get-Content 'build.bat')
-RUN build.bat --cmake_generator "Visual Studio 17 2022" --config Release --skip_submodule_sync --parallel --build_shared_lib --update --build --build_dir /workspace/build {}
-""".format(
-        ep_flags
-    )
+    #
+    # Copy all artifacts needed by the backend to /opt/onnxruntime
+    #
+    WORKDIR /opt/onnxruntime
+    RUN copy \\workspace\\onnxruntime\\LICENSE \\opt\\onnxruntime
+    RUN copy \\workspace\\onnxruntime\\cmake\\external\\onnx\\VERSION_NUMBER \\opt\\onnxruntime\\ort_onnx_version.txt
+
+    # ONNX Runtime headers, libraries and binaries
+    WORKDIR /opt/onnxruntime/include
+    RUN copy \\workspace\\onnxruntime\\include\\onnxruntime\\core\\session\\onnxruntime_c_api.h \\opt\\onnxruntime\\include
+    RUN copy \\workspace\\onnxruntime\\include\\onnxruntime\\core\\session\\onnxruntime_session_options_config_keys.h \\opt\\onnxruntime\\include
+    RUN copy \\workspace\\onnxruntime\\include\\onnxruntime\\core\\providers\\cpu\\cpu_provider_factory.h \\opt\\onnxruntime\\include
+
+    WORKDIR /opt/onnxruntime/bin
+    RUN copy \\workspace\\build\\Release\\Release\\onnxruntime.dll \\opt\\onnxruntime\\bin
+    RUN copy \\workspace\\build\\Release\\Release\\onnxruntime_providers_shared.dll \\opt\\onnxruntime\\bin
+    RUN copy \\workspace\\build\\Release\\Release\\onnxruntime_perf_test.exe \\opt\\onnxruntime\\bin
+    RUN copy \\workspace\\build\\Release\\Release\\onnx_test_runner.exe \\opt\\onnxruntime\\bin
+
+    WORKDIR /opt/onnxruntime/lib
+    RUN copy \\workspace\\build\\Release\\Release\\onnxruntime.lib \\opt\\onnxruntime\\lib
+    RUN copy \\workspace\\build\\Release\\Release\\onnxruntime_providers_shared.lib \\opt\\onnxruntime\\lib
+    """
+
+        if FLAGS.enable_gpu:
+            df += """
+    WORKDIR /opt/onnxruntime/lib
+    RUN copy \\workspace\\build\\Release\\Release\\onnxruntime_providers_cuda.lib \\opt\\onnxruntime\\lib
+    WORKDIR /opt/onnxruntime/bin
+    RUN copy \\workspace\\build\\Release\\Release\\onnxruntime_providers_cuda.dll \\opt\\onnxruntime\\bin
+    """
 
-    df += """
-#
-# Copy all artifacts needed by the backend to /opt/onnxruntime
-#
-WORKDIR /opt/onnxruntime
-RUN copy \\workspace\\onnxruntime\\LICENSE \\opt\\onnxruntime
-RUN copy \\workspace\\onnxruntime\\cmake\\external\\onnx\\VERSION_NUMBER \\opt\\onnxruntime\\ort_onnx_version.txt
-
-# ONNX Runtime headers, libraries and binaries
-WORKDIR /opt/onnxruntime/include
-RUN copy \\workspace\\onnxruntime\\include\\onnxruntime\\core\\session\\onnxruntime_c_api.h \\opt\\onnxruntime\\include
-RUN copy \\workspace\\onnxruntime\\include\\onnxruntime\\core\\session\\onnxruntime_session_options_config_keys.h \\opt\\onnxruntime\\include
-RUN copy \\workspace\\onnxruntime\\include\\onnxruntime\\core\\providers\\cpu\\cpu_provider_factory.h \\opt\\onnxruntime\\include
-
-WORKDIR /opt/onnxruntime/bin
-RUN copy \\workspace\\build\\Release\\Release\\onnxruntime.dll \\opt\\onnxruntime\\bin
-RUN copy \\workspace\\build\\Release\\Release\\onnxruntime_providers_shared.dll \\opt\\onnxruntime\\bin
-RUN copy \\workspace\\build\\Release\\Release\\onnxruntime_perf_test.exe \\opt\\onnxruntime\\bin
-RUN copy \\workspace\\build\\Release\\Release\\onnx_test_runner.exe \\opt\\onnxruntime\\bin
-
-WORKDIR /opt/onnxruntime/lib
-RUN copy \\workspace\\build\\Release\\Release\\onnxruntime.lib \\opt\\onnxruntime\\lib
-RUN copy \\workspace\\build\\Release\\Release\\onnxruntime_providers_shared.lib \\opt\\onnxruntime\\lib
-"""
+        if FLAGS.enable_rocm:
+            df += """
+    WORKDIR /opt/onnxruntime/lib
+    RUN copy \\workspace\\build\\Release\\Release\\onnxruntime_providers_rocm.lib \\opt\\onnxruntime\\lib
+    WORKDIR /opt/onnxruntime/bin
+    RUN copy \\workspace\\build\\Release\\Release\\onnxruntime_providers_rocm.dll \\opt\\onnxruntime\\bin
+    """
 
-    if FLAGS.enable_gpu:
-        df += """
-WORKDIR /opt/onnxruntime/lib
-RUN copy \\workspace\\build\\Release\\Release\\onnxruntime_providers_cuda.lib \\opt\\onnxruntime\\lib
-WORKDIR /opt/onnxruntime/bin
-RUN copy \\workspace\\build\\Release\\Release\\onnxruntime_providers_cuda.dll \\opt\\onnxruntime\\bin
-"""
+        if FLAGS.ort_tensorrt:
+            df += """
+    # TensorRT specific headers and libraries
+    WORKDIR /opt/onnxruntime/include
+    RUN copy \\workspace\\onnxruntime\\include\\onnxruntime\\core\\providers\\tensorrt\\tensorrt_provider_factory.h \\opt\\onnxruntime\\include
 
-    if FLAGS.enable_rocm:
-        df += """
-WORKDIR /opt/onnxruntime/lib
-RUN copy \\workspace\\build\\Release\\Release\\onnxruntime_providers_rocm.lib \\opt\\onnxruntime\\lib
-WORKDIR /opt/onnxruntime/bin
-RUN copy \\workspace\\build\\Release\\Release\\onnxruntime_providers_rocm.dll \\opt\\onnxruntime\\bin
-"""
+    WORKDIR /opt/onnxruntime/lib
+    RUN copy \\workspace\\build\\Release\\Release\\onnxruntime_providers_tensorrt.dll \\opt\\onnxruntime\\bin
 
-    if FLAGS.ort_tensorrt:
-        df += """
-# TensorRT specific headers and libraries
-WORKDIR /opt/onnxruntime/include
-RUN copy \\workspace\\onnxruntime\\include\\onnxruntime\\core\\providers\\tensorrt\\tensorrt_provider_factory.h \\opt\\onnxruntime\\include
+    WORKDIR /opt/onnxruntime/lib
+    RUN copy \\workspace\\build\\Release\\Release\\onnxruntime_providers_tensorrt.lib \\opt\\onnxruntime\\lib
+    """
 
-WORKDIR /opt/onnxruntime/lib
-RUN copy \\workspace\\build\\Release\\Release\\onnxruntime_providers_tensorrt.dll \\opt\\onnxruntime\\bin
+        if FLAGS.ort_migraphx:
+            df += """
+    # MIGraphX specific headers and libraries
+    WORKDIR /opt/onnxruntime/include
+    RUN copy \\workspace\\onnxruntime\\onnxruntime\\core\\providers\\migraphx\\migraphx_provider_factory.h \\opt\\onnxruntime\\include
+
+    WORKDIR /opt/onnxruntime/lib
+    RUN copy \\workspace\\build\\Release\\Release\\onnxruntime_providers_migraphx.dll \\opt\\onnxruntime\\bin
+
+    WORKDIR /opt/onnxruntime/lib
+    RUN copy \\workspace\\build\\Release\\Release\\onnxruntime_providers_migraphx.lib \\opt\\onnxruntime\\lib
+    """
+        with open(output_file, "w") as dfile:
+            dfile.write(df)
+
+
+    def preprocess_gpu_flags():
+        if target_platform() == "windows":
+            # Default to CUDA based on CUDA_PATH envvar and TensorRT in
+            # C:/tensorrt
+            if "CUDA_PATH" in os.environ:
+                if FLAGS.cuda_home is None:
+                    FLAGS.cuda_home = os.environ["CUDA_PATH"]
+                elif FLAGS.cuda_home != os.environ["CUDA_PATH"]:
+                    print("warning: --cuda-home does not match CUDA_PATH envvar")
 
-WORKDIR /opt/onnxruntime/lib
-RUN copy \\workspace\\build\\Release\\Release\\onnxruntime_providers_tensorrt.lib \\opt\\onnxruntime\\lib
-"""
+            if FLAGS.cudnn_home is None:
+                FLAGS.cudnn_home = FLAGS.cuda_home
 
-    if FLAGS.ort_migraphx:
-        df += """
-# MIGraphX specific headers and libraries
-WORKDIR /opt/onnxruntime/include
-RUN copy \\workspace\\onnxruntime\\onnxruntime\\core\\providers\\migraphx\\migraphx_provider_factory.h \\opt\\onnxruntime\\include
+            version = None
+            m = re.match(r".*v([1-9]?[0-9]+\.[0-9]+)$", FLAGS.cuda_home)
+            if m:
+                version = m.group(1)
 
-WORKDIR /opt/onnxruntime/lib
-RUN copy \\workspace\\build\\Release\\Release\\onnxruntime_providers_migraphx.dll \\opt\\onnxruntime\\bin
+            if FLAGS.cuda_version is None:
+                FLAGS.cuda_version = version
+            elif FLAGS.cuda_version != version:
+                print("warning: --cuda-version does not match CUDA_PATH envvar")
 
-WORKDIR /opt/onnxruntime/lib
-RUN copy \\workspace\\build\\Release\\Release\\onnxruntime_providers_migraphx.lib \\opt\\onnxruntime\\lib
-"""
-    with open(output_file, "w") as dfile:
-        dfile.write(df)
+            if (FLAGS.cuda_home is None) or (FLAGS.cuda_version is None):
+                print("error: windows build requires --cuda-version and --cuda-home")
 
+            if FLAGS.tensorrt_home is None:
+                FLAGS.tensorrt_home = "/tensorrt"
+        else:
+            if "CUDNN_VERSION" in os.environ:
+                version = None
+                m = re.match(r"([0-9]\.[0-9])\.[0-9]\.[0-9]", os.environ["CUDNN_VERSION"])
+                if m:
+                    version = m.group(1)
+                if FLAGS.cudnn_home is None:
+                    FLAGS.cudnn_home = "/usr/local/cudnn-{}/cuda".format(version)
 
-def preprocess_gpu_flags():
-    if target_platform() == "windows":
-        # Default to CUDA based on CUDA_PATH envvar and TensorRT in
-        # C:/tensorrt
-        if "CUDA_PATH" in os.environ:
             if FLAGS.cuda_home is None:
-                FLAGS.cuda_home = os.environ["CUDA_PATH"]
-            elif FLAGS.cuda_home != os.environ["CUDA_PATH"]:
-                print("warning: --cuda-home does not match CUDA_PATH envvar")
+                FLAGS.cuda_home = "/usr/local/cuda"
 
-        if FLAGS.cudnn_home is None:
-            FLAGS.cudnn_home = FLAGS.cuda_home
+            if (FLAGS.cuda_home is None) or (FLAGS.cudnn_home is None):
+                print("error: linux build requires --cudnn-home and --cuda-home")
 
-        version = None
-        m = re.match(r".*v([1-9]?[0-9]+\.[0-9]+)$", FLAGS.cuda_home)
-        if m:
-            version = m.group(1)
+            if FLAGS.tensorrt_home is None:
+                FLAGS.tensorrt_home = "/usr/src/tensorrt"
+                
+            if FLAGS.rocm_home is None:
+                FLAGS.rocm_home = "/opt/rocm/"
 
-        if FLAGS.cuda_version is None:
-            FLAGS.cuda_version = version
-        elif FLAGS.cuda_version != version:
-            print("warning: --cuda-version does not match CUDA_PATH envvar")
+            if FLAGS.migraphx_home is None:
+                FLAGS.migraphx_home = "/opt/rocm/"
 
-        if (FLAGS.cuda_home is None) or (FLAGS.cuda_version is None):
-            print("error: windows build requires --cuda-version and --cuda-home")
-
-        if FLAGS.tensorrt_home is None:
-            FLAGS.tensorrt_home = "/tensorrt"
-    else:
-        if "CUDNN_VERSION" in os.environ:
-            version = None
-            m = re.match(r"([0-9]\.[0-9])\.[0-9]\.[0-9]", os.environ["CUDNN_VERSION"])
-            if m:
-                version = m.group(1)
-            if FLAGS.cudnn_home is None:
-                FLAGS.cudnn_home = "/usr/local/cudnn-{}/cuda".format(version)
 
-        if FLAGS.cuda_home is None:
-            FLAGS.cuda_home = "/usr/local/cuda"
 
-        if (FLAGS.cuda_home is None) or (FLAGS.cudnn_home is None):
-            print("error: linux build requires --cudnn-home and --cuda-home")
+    if __name__ == "__main__":
+        parser = argparse.ArgumentParser()
 
-        if FLAGS.tensorrt_home is None:
-            FLAGS.tensorrt_home = "/usr/src/tensorrt"
-            
-        if FLAGS.rocm_home is None:
-            FLAGS.rocm_home = "/opt/rocm/"
-
-        if FLAGS.migraphx_home is None:
-            FLAGS.migraphx_home = "/opt/rocm/"
-
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-
-    parser.add_argument(
-        "--triton-container",
-        type=str,
-        required=True,
-        help="Triton base container to use for ORT build.",
-    )
-    parser.add_argument("--ort-version", type=str, required=True, help="ORT version.")
-    parser.add_argument(
-        "--output", type=str, required=True, help="File to write Dockerfile to."
-    )
-    parser.add_argument(
-        "--enable-gpu", action="store_true", required=False, help="Enable GPU support"
-    )
-    parser.add_argument(
-        "--enable-rocm", action="store_true", required=False, help="Enable GPU support"
-    )
-    parser.add_argument(
-        "--ort-build-config",
-        type=str,
-        default="Release",
-        choices=["Debug", "Release", "RelWithDebInfo"],
-        help="ORT build configuration.",
-    )
-    parser.add_argument(
-        "--target-platform",
-        required=False,
-        default=None,
-        help='Target for build, can be "linux", "windows" or "igpu". If not specified, build targets the current platform.',
-    )
+        parser.add_argument(
+            "--triton-container",
+            type=str,
+            required=True,
+            help="Triton base container to use for ORT build.",
+        )
+        parser.add_argument("--ort-version", type=str, required=True, help="ORT version.")
+        parser.add_argument(
+            "--output", type=str, required=True, help="File to write Dockerfile to."
+        )
+        parser.add_argument(
+            "--enable-gpu", action="store_true", required=False, help="Enable GPU support"
+        )
+        parser.add_argument(
+            "--enable-rocm", action="store_true", required=False, help="Enable GPU support"
+        )
+        parser.add_argument(
+            "--ort-build-config",
+            type=str,
+            default="Release",
+            choices=["Debug", "Release", "RelWithDebInfo"],
+            help="ORT build configuration.",
+        )
+        parser.add_argument(
+            "--target-platform",
+            required=False,
+            default=None,
+            help='Target for build, can be "ubuntu", "windows" or "jetpack". If not specified, build targets the current platform.',
+        )
 
-    parser.add_argument(
-        "--cuda-version", type=str, required=False, help="Version for CUDA."
-    )
-    parser.add_argument(
-        "--cuda-home", type=str, required=False, help="Home directory for CUDA."
-    )
-    parser.add_argument(
-        "--rocm-version", type=str, required=False, help="Version for ROCM."
-    )
-    parser.add_argument(
-        "--rocm-home", type=str, required=False, help="Home directory for ROCM."
-    )
+        parser.add_argument(
+            "--cuda-version", type=str, required=False, help="Version for CUDA."
+        )
+        parser.add_argument(
+            "--cuda-home", type=str, required=False, help="Home directory for CUDA."
+        )
+        parser.add_argument(
+            "--rocm-version", type=str, required=False, help="Version for ROCM."
+        )
+        parser.add_argument(
+            "--rocm-home", type=str, required=False, help="Home directory for ROCM."
+        )
 
-    parser.add_argument(
-        "--cudnn-home", type=str, required=False, help="Home directory for CUDNN."
-    )
-    parser.add_argument(
-        "--ort-openvino",
-        type=str,
-        required=False,
-        help="Enable OpenVino execution provider using specified OpenVINO version.",
-    )
-    parser.add_argument(
-        "--ort-tensorrt",
-        action="store_true",
-        required=False,
-        help="Enable TensorRT execution provider.",
-    )
-    parser.add_argument(
+        parser.add_argument(
+            "--cudnn-home", type=str, required=False, help="Home directory for CUDNN."
+        )
+        parser.add_argument(
+            "--ort-openvino",
+            type=str,
+            required=False,
+            help="Enable OpenVino execution provider using specified OpenVINO version.",
+        )
+        parser.add_argument(
+            "--ort-tensorrt",
+            action="store_true",
+            required=False,
+            help="Enable TensorRT execution provider.",
+        )
+        parser.add_argument(
         "--tensorrt-home", type=str, required=False, help="Home directory for TensorRT."
     )
     parser.add_argument(

From 3df7f34124c4f355b9380b000dca40a8607f49ab Mon Sep 17 00:00:00 2001
From: Ted Themistokleous <tedthemistokleous@amd.com>
Date: Wed, 10 Jan 2024 18:10:10 +0000
Subject: [PATCH 09/32] Update cmake for generator script

---
 CMakeLists.txt | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 6231594..885021c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -395,6 +395,7 @@ if(TRITON_ONNXRUNTIME_DOCKER_BUILD)
   endif() # TRITON_BUILD_CUDNN_HOME
   if(NOT ${TRITON_BUILD_TENSORRT_HOME} STREQUAL "")
     set(_GEN_FLAGS ${_GEN_FLAGS} "--tensorrt-home=${TRITON_BUILD_TENSORRT_HOME}")
+    set(_GEN_FLAGS ${_GEN_FLAGS} "--trt-version="${TRT_VERSION}" --onnx-tensorrt-tag="${TRITON_ONNX_TENSORRT_REPO_TAG}") 
   endif() # TRITON_BUILD_TENSORRT_HOME
   if(${TRITON_ENABLE_ONNXRUNTIME_TENSORRT})
     set(_GEN_FLAGS ${_GEN_FLAGS} "--ort-tensorrt")
@@ -430,7 +431,7 @@ if(TRITON_ONNXRUNTIME_DOCKER_BUILD)
     add_custom_command(
       OUTPUT
         onnxruntime/lib/${ONNXRUNTIME_LIBRARY}
-      COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/tools/gen_ort_dockerfile.py --triton-container="${TRITON_BUILD_CONTAINER}" --ort-version="${TRITON_BUILD_ONNXRUNTIME_VERSION}" --trt-version="${TRT_VERSION}" --onnx-tensorrt-tag="${TRITON_ONNX_TENSORRT_REPO_TAG}" ${_GEN_FLAGS} --output=Dockerfile.ort ${ENABLE_GPU_EXTRA_ARGS}
+      COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/tools/gen_ort_dockerfile.py --triton-container="${TRITON_BUILD_CONTAINER}" --ort-version="${TRITON_BUILD_ONNXRUNTIME_VERSION}" ${_GEN_FLAGS} --output=Dockerfile.ort ${ENABLE_GPU_EXTRA_ARGS}
       COMMAND docker build --memory ${TRITON_ONNXRUNTIME_DOCKER_MEMORY} --cache-from=${TRITON_ONNXRUNTIME_DOCKER_IMAGE} --cache-from=${TRITON_ONNXRUNTIME_DOCKER_IMAGE}_cache0 --cache-from=${TRITON_ONNXRUNTIME_DOCKER_IMAGE}_cache1 -t ${TRITON_ONNXRUNTIME_DOCKER_IMAGE} -f ./Dockerfile.ort ${CMAKE_CURRENT_SOURCE_DIR}
       COMMAND powershell.exe -noprofile -c "docker rm onnxruntime_backend_ort > $null 2>&1; if ($LASTEXITCODE) { 'error ignored...' }; exit 0"
       COMMAND docker create --name onnxruntime_backend_ort ${TRITON_ONNXRUNTIME_DOCKER_IMAGE}
@@ -443,7 +444,7 @@ if(TRITON_ONNXRUNTIME_DOCKER_BUILD)
     add_custom_command(
       OUTPUT
         onnxruntime/lib/${ONNXRUNTIME_LIBRARY}
-      COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/tools/gen_ort_dockerfile.py  --ort-build-config="${CMAKE_BUILD_TYPE}" --triton-container="${TRITON_BUILD_CONTAINER}" --ort-version="${TRITON_BUILD_ONNXRUNTIME_VERSION}" --trt-version="${TRT_VERSION}" --onnx-tensorrt-tag="${TRITON_ONNX_TENSORRT_REPO_TAG}" ${_GEN_FLAGS} --output=Dockerfile.ort ${ENABLE_GPU_EXTRA_ARGS}
+      COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/tools/gen_ort_dockerfile.py  --ort-build-config="${CMAKE_BUILD_TYPE}" --triton-container="${TRITON_BUILD_CONTAINER}" --ort-version="${TRITON_BUILD_ONNXRUNTIME_VERSION}" ${_GEN_FLAGS} --output=Dockerfile.ort ${ENABLE_GPU_EXTRA_ARGS}
       COMMAND docker build --cache-from=${TRITON_ONNXRUNTIME_DOCKER_IMAGE} --cache-from=${TRITON_ONNXRUNTIME_DOCKER_IMAGE}_cache0 --cache-from=${TRITON_ONNXRUNTIME_DOCKER_IMAGE}_cache1 -t ${TRITON_ONNXRUNTIME_DOCKER_IMAGE} -f ./Dockerfile.ort ${CMAKE_CURRENT_SOURCE_DIR}
       COMMAND docker rm onnxruntime_backend_ort || echo 'error ignored...' || true
       COMMAND docker create --name onnxruntime_backend_ort ${TRITON_ONNXRUNTIME_DOCKER_IMAGE}

From dad50417832c2b2b773652b8a948f11f8ec7cf61 Mon Sep 17 00:00:00 2001
From: Ted Themistokleous <tedthemistokleous@amd.com>
Date: Fri, 12 Jan 2024 01:30:40 +0000
Subject: [PATCH 10/32] Fix link to hiprtc

---
 CMakeLists.txt | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 885021c..a5a4b07 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -364,7 +364,7 @@ if(${TRITON_ENABLE_ROCM})
   target_link_libraries(
     triton-onnxruntime-backend
     PRIVATE
-      CUDA::hiprt
+      hiprtc::hiprtc
   )
 endif()  TRITON_ENABLE_ROCM
 
@@ -395,10 +395,9 @@ if(TRITON_ONNXRUNTIME_DOCKER_BUILD)
   endif() # TRITON_BUILD_CUDNN_HOME
   if(NOT ${TRITON_BUILD_TENSORRT_HOME} STREQUAL "")
     set(_GEN_FLAGS ${_GEN_FLAGS} "--tensorrt-home=${TRITON_BUILD_TENSORRT_HOME}")
-    set(_GEN_FLAGS ${_GEN_FLAGS} "--trt-version="${TRT_VERSION}" --onnx-tensorrt-tag="${TRITON_ONNX_TENSORRT_REPO_TAG}") 
   endif() # TRITON_BUILD_TENSORRT_HOME
   if(${TRITON_ENABLE_ONNXRUNTIME_TENSORRT})
-    set(_GEN_FLAGS ${_GEN_FLAGS} "--ort-tensorrt")
+    set(_GEN_FLAGS ${_GEN_FLAGS} "--ort-tensorrt --trt-version="${TRT_VERSION}" --onnx-tensorrt-tag="${TRITON_ONNX_TENSORRT_REPO_TAG}")
   endif() # TRITON_ENABLE_ONNXRUNTIME_TENSORRT
   if(${TRITON_ENABLE_ONNXRUNTIME_OPENVINO})
     set(_GEN_FLAGS ${_GEN_FLAGS} "--ort-openvino=${TRITON_BUILD_ONNXRUNTIME_OPENVINO_VERSION}")

From 61b6650fee54d381b9dbbf33f78a7aa612b950eb Mon Sep 17 00:00:00 2001
From: Ted Themistokleous <tedthemistokleous@amd.com>
Date: Wed, 21 Feb 2024 18:05:09 +0000
Subject: [PATCH 11/32] Fixes to scripts

---
 CMakeLists.txt              | 2 +-
 tools/gen_ort_dockerfile.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index a5a4b07..f0d881e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -366,7 +366,7 @@ if(${TRITON_ENABLE_ROCM})
     PRIVATE
       hiprtc::hiprtc
   )
-endif()  TRITON_ENABLE_ROCM
+endif() #TRITON_ENABLE_ROCM
 
 if(${TRITON_ENABLE_ONNXRUNTIME_OPENVINO})
   target_link_libraries(
diff --git a/tools/gen_ort_dockerfile.py b/tools/gen_ort_dockerfile.py
index 1018329..d678494 100755
--- a/tools/gen_ort_dockerfile.py
+++ b/tools/gen_ort_dockerfile.py
@@ -238,7 +238,7 @@ def dockerfile_for_linux(output_file):
             (cd onnxruntime && git submodule update --init --recursive)
 
         """
-        elif FLAGS.enable_rocm is not None:
+        elif FLAGS.enable_rocm:
             df += """
         #
         # onnx runtime build

From 062d0a2a9969a4cb31c3a31969a9f39c2ec4cca9 Mon Sep 17 00:00:00 2001
From: Ted Themistokleous <tedthemistokleous@amd.com>
Date: Wed, 21 Feb 2024 19:19:44 +0000
Subject: [PATCH 12/32] Fix warning in CMakeList

---
 CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index f0d881e..d7ea3fe 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -397,7 +397,7 @@ if(TRITON_ONNXRUNTIME_DOCKER_BUILD)
     set(_GEN_FLAGS ${_GEN_FLAGS} "--tensorrt-home=${TRITON_BUILD_TENSORRT_HOME}")
   endif() # TRITON_BUILD_TENSORRT_HOME
   if(${TRITON_ENABLE_ONNXRUNTIME_TENSORRT})
-    set(_GEN_FLAGS ${_GEN_FLAGS} "--ort-tensorrt --trt-version="${TRT_VERSION}" --onnx-tensorrt-tag="${TRITON_ONNX_TENSORRT_REPO_TAG}")
+    set(_GEN_FLAGS ${_GEN_FLAGS} "--ort-tensorrt --trt-version=${TRT_VERSION} --onnx-tensorrt-tag=${TRITON_ONNX_TENSORRT_REPO_TAG}")
   endif() # TRITON_ENABLE_ONNXRUNTIME_TENSORRT
   if(${TRITON_ENABLE_ONNXRUNTIME_OPENVINO})
     set(_GEN_FLAGS ${_GEN_FLAGS} "--ort-openvino=${TRITON_BUILD_ONNXRUNTIME_OPENVINO_VERSION}")

From 1fd0f37ae6be436c30991c8af0b3f40db37cf0f5 Mon Sep 17 00:00:00 2001
From: Ted Themistokleous <tedthemistokleous@amd.com>
Date: Wed, 21 Feb 2024 20:28:26 +0000
Subject: [PATCH 13/32] Remove hiptoolkit package for now

---
 CMakeLists.txt | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index d7ea3fe..b450b9c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -231,9 +231,9 @@ endif() # TRITON_ENABLE_GPU
 #
 # ROCM
 #
-if(${TRITON_ENABLE_ROCM})
-  find_package(HIPToolkit REQUIRED)
-endif() # TRITON_ENABLE_ROCM
+#if(${TRITON_ENABLE_ROCM})
+#  find_package(HIPToolkit REQUIRED)
+#endif() # TRITON_ENABLE_ROCM
 
 #
 # Shared library implementing the Triton Backend API

From c0877416c1e7d9e02781a5eda220f97ce5ed12bf Mon Sep 17 00:00:00 2001
From: Ted Themistokleous <tedthemistokleous@amd.com>
Date: Wed, 21 Feb 2024 20:45:34 +0000
Subject: [PATCH 14/32] Use hip::host instead of hiprtc::hiprtc

---
 CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index b450b9c..2314054 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -364,7 +364,7 @@ if(${TRITON_ENABLE_ROCM})
   target_link_libraries(
     triton-onnxruntime-backend
     PRIVATE
-      hiprtc::hiprtc
+      hip::host
   )
 endif() #TRITON_ENABLE_ROCM
 

From 7e7ec5561867d5b1c3d35ed6a2428fab0d7865da Mon Sep 17 00:00:00 2001
From: Ted Themistokleous <tedthemistokleous@amd.com>
Date: Wed, 21 Feb 2024 20:53:22 +0000
Subject: [PATCH 15/32] fixup! Use hip::host instead of hiprtc::hiprtc

---
 CMakeLists.txt | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 2314054..29a6a6e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -231,9 +231,9 @@ endif() # TRITON_ENABLE_GPU
 #
 # ROCM
 #
-#if(${TRITON_ENABLE_ROCM})
-#  find_package(HIPToolkit REQUIRED)
-#endif() # TRITON_ENABLE_ROCM
+if(${TRITON_ENABLE_ROCM})
+  find_package(hip REQUIRED)
+endif() # TRITON_ENABLE_ROCM
 
 #
 # Shared library implementing the Triton Backend API

From ef5de154af10d47401d3537b933d4bc034d07a70 Mon Sep 17 00:00:00 2001
From: Ted Themistokleous <tedthemistokleous@amd.com>
Date: Wed, 21 Feb 2024 23:40:33 +0000
Subject: [PATCH 16/32] Allow flowthrough of base container for ROCm builds

---
 CMakeLists.txt | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 29a6a6e..8386975 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -176,7 +176,9 @@ else()
   endif()
 
   if(NOT TRITON_BUILD_CONTAINER)
-    set(TRITON_BUILD_CONTAINER "nvcr.io/nvidia/tritonserver:${TRITON_BUILD_CONTAINER_VERSION}-py3-min")
+    if (NOT TRITON_ENABLE_ROCM)
+      set(TRITON_BUILD_CONTAINER "nvcr.io/nvidia/tritonserver:${TRITON_BUILD_CONTAINER_VERSION}-py3-min")
+    endif()
   endif()
 
   set(TRITON_ONNXRUNTIME_DOCKER_IMAGE "tritonserver_onnxruntime")

From 16565e2372fa243cff6f10ea0cf86e4ceda4f28b Mon Sep 17 00:00:00 2001
From: Ted Themistokleous <tedthemistokleous@amd.com>
Date: Thu, 22 Feb 2024 00:50:14 +0000
Subject: [PATCH 17/32] Hard code ROCm container for now in build. Parameterize
 later

---
 CMakeLists.txt | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8386975..3914a92 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -176,7 +176,9 @@ else()
   endif()
 
   if(NOT TRITON_BUILD_CONTAINER)
-    if (NOT TRITON_ENABLE_ROCM)
+    if (TRITON_ENABLE_ROCM)
+      set(TRITON_BUILD_CONTAINER "rocm/pytorch:rocm6.0.2_ubuntu22.04_py3.10_pytorch_2.1.2")
+    else()_
       set(TRITON_BUILD_CONTAINER "nvcr.io/nvidia/tritonserver:${TRITON_BUILD_CONTAINER_VERSION}-py3-min")
     endif()
   endif()

From 4bb23b2b173c94f6ad64ecc6d9035519cd53c591 Mon Sep 17 00:00:00 2001
From: Ted Themistokleous <tedthemistokleous@amd.com>
Date: Thu, 22 Feb 2024 04:35:27 +0000
Subject: [PATCH 18/32] fixup! Hard code ROCm container for now in build.
 Parameterize later

---
 CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 3914a92..4708312 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -178,7 +178,7 @@ else()
   if(NOT TRITON_BUILD_CONTAINER)
     if (TRITON_ENABLE_ROCM)
       set(TRITON_BUILD_CONTAINER "rocm/pytorch:rocm6.0.2_ubuntu22.04_py3.10_pytorch_2.1.2")
-    else()_
+    else()
       set(TRITON_BUILD_CONTAINER "nvcr.io/nvidia/tritonserver:${TRITON_BUILD_CONTAINER_VERSION}-py3-min")
     endif()
   endif()

From 06a952e6fdc088a1c436f6564bd76910fd192a63 Mon Sep 17 00:00:00 2001
From: Ted Themistokleous <tedthemistokleous@amd.com>
Date: Thu, 22 Feb 2024 23:02:36 +0000
Subject: [PATCH 19/32] Add docker commands to build to view process

---
 CMakeLists.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 4708312..af823e8 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -452,6 +452,8 @@ if(TRITON_ONNXRUNTIME_DOCKER_BUILD)
       COMMAND docker rm onnxruntime_backend_ort || echo 'error ignored...' || true
       COMMAND docker create --name onnxruntime_backend_ort ${TRITON_ONNXRUNTIME_DOCKER_IMAGE}
       COMMAND rm -fr onnxruntime
+      COMMAND docker list 
+      COMMAND docker ps
       COMMAND docker cp onnxruntime_backend_ort:/opt/onnxruntime onnxruntime
       COMMAND docker rm onnxruntime_backend_ort
       COMMENT "Building ONNX Runtime"

From e665bbd8dad11fd939b87a069ffd8c542b161c9d Mon Sep 17 00:00:00 2001
From: Ted Themistokleous <tedthemistokleous@amd.com>
Date: Thu, 22 Feb 2024 23:02:54 +0000
Subject: [PATCH 20/32] Add rocm hip include for onnxruntime.cc

---
 src/onnxruntime.cc | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/onnxruntime.cc b/src/onnxruntime.cc
index decf297..790779b 100644
--- a/src/onnxruntime.cc
+++ b/src/onnxruntime.cc
@@ -43,6 +43,11 @@
 #include <cuda_runtime_api.h>
 #endif  // TRITON_ENABLE_GPU
 
+#ifdef TRITON_ENABLE_ROCM
+#include <hip_runtime_api.h>
+#endif  // TRITON_ENABLE_ROCM
+
+
 //
 // ONNX Runtime Backend that implements the TRITONBACKEND API.
 //

From 57cdc67ac4e96434de58ed169652654fc121b4ff Mon Sep 17 00:00:00 2001
From: Ted Themistokleous <tedthemistokleous@amd.com>
Date: Thu, 22 Feb 2024 23:03:45 +0000
Subject: [PATCH 21/32] fixup! Add docker commands to build to view process

---
 CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index af823e8..00dcc93 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -452,7 +452,7 @@ if(TRITON_ONNXRUNTIME_DOCKER_BUILD)
       COMMAND docker rm onnxruntime_backend_ort || echo 'error ignored...' || true
       COMMAND docker create --name onnxruntime_backend_ort ${TRITON_ONNXRUNTIME_DOCKER_IMAGE}
       COMMAND rm -fr onnxruntime
-      COMMAND docker list 
+      COMMAND docker image list 
       COMMAND docker ps
       COMMAND docker cp onnxruntime_backend_ort:/opt/onnxruntime onnxruntime
       COMMAND docker rm onnxruntime_backend_ort

From 3d83ddac9ec5f02c6aa6bd6c21a465581ac741b7 Mon Sep 17 00:00:00 2001
From: Ted Themistokleous <tedthemistokleous@amd.com>
Date: Thu, 22 Feb 2024 23:34:20 +0000
Subject: [PATCH 22/32] Show dockerfile during ORT build

---
 CMakeLists.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 00dcc93..348512c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -448,6 +448,7 @@ if(TRITON_ONNXRUNTIME_DOCKER_BUILD)
       OUTPUT
         onnxruntime/lib/${ONNXRUNTIME_LIBRARY}
       COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/tools/gen_ort_dockerfile.py  --ort-build-config="${CMAKE_BUILD_TYPE}" --triton-container="${TRITON_BUILD_CONTAINER}" --ort-version="${TRITON_BUILD_ONNXRUNTIME_VERSION}" ${_GEN_FLAGS} --output=Dockerfile.ort ${ENABLE_GPU_EXTRA_ARGS}
+      COMMAND cat Docker.ort
       COMMAND docker build --cache-from=${TRITON_ONNXRUNTIME_DOCKER_IMAGE} --cache-from=${TRITON_ONNXRUNTIME_DOCKER_IMAGE}_cache0 --cache-from=${TRITON_ONNXRUNTIME_DOCKER_IMAGE}_cache1 -t ${TRITON_ONNXRUNTIME_DOCKER_IMAGE} -f ./Dockerfile.ort ${CMAKE_CURRENT_SOURCE_DIR}
       COMMAND docker rm onnxruntime_backend_ort || echo 'error ignored...' || true
       COMMAND docker create --name onnxruntime_backend_ort ${TRITON_ONNXRUNTIME_DOCKER_IMAGE}

From f8302e9e7834509b91ea5c8f52e6b9a015887ce4 Mon Sep 17 00:00:00 2001
From: Ted Themistokleous <tedthemistokleous@amd.com>
Date: Thu, 22 Feb 2024 23:35:26 +0000
Subject: [PATCH 23/32] Dockerfile.ort

---
 CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 348512c..95f0dbb 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -448,7 +448,7 @@ if(TRITON_ONNXRUNTIME_DOCKER_BUILD)
       OUTPUT
         onnxruntime/lib/${ONNXRUNTIME_LIBRARY}
       COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/tools/gen_ort_dockerfile.py  --ort-build-config="${CMAKE_BUILD_TYPE}" --triton-container="${TRITON_BUILD_CONTAINER}" --ort-version="${TRITON_BUILD_ONNXRUNTIME_VERSION}" ${_GEN_FLAGS} --output=Dockerfile.ort ${ENABLE_GPU_EXTRA_ARGS}
-      COMMAND cat Docker.ort
+      COMMAND cat Dockerfile.ort
       COMMAND docker build --cache-from=${TRITON_ONNXRUNTIME_DOCKER_IMAGE} --cache-from=${TRITON_ONNXRUNTIME_DOCKER_IMAGE}_cache0 --cache-from=${TRITON_ONNXRUNTIME_DOCKER_IMAGE}_cache1 -t ${TRITON_ONNXRUNTIME_DOCKER_IMAGE} -f ./Dockerfile.ort ${CMAKE_CURRENT_SOURCE_DIR}
       COMMAND docker rm onnxruntime_backend_ort || echo 'error ignored...' || true
       COMMAND docker create --name onnxruntime_backend_ort ${TRITON_ONNXRUNTIME_DOCKER_IMAGE}

From cc9320b87a55f3fcbb801002dba115951971c6aa Mon Sep 17 00:00:00 2001
From: Ted Themistokleous <tedthemistokleous@amd.com>
Date: Fri, 23 Feb 2024 15:44:12 +0000
Subject: [PATCH 24/32] bit more cleanup on gen_ort_dockerfile.py script

---
 tools/gen_ort_dockerfile.py | 45 +++++++++++++++++++++----------------
 1 file changed, 26 insertions(+), 19 deletions(-)

diff --git a/tools/gen_ort_dockerfile.py b/tools/gen_ort_dockerfile.py
index d678494..134aa7b 100755
--- a/tools/gen_ort_dockerfile.py
+++ b/tools/gen_ort_dockerfile.py
@@ -640,28 +640,30 @@ def preprocess_gpu_flags():
             if FLAGS.tensorrt_home is None:
                 FLAGS.tensorrt_home = "/tensorrt"
         else:
-            if "CUDNN_VERSION" in os.environ:
-                version = None
-                m = re.match(r"([0-9]\.[0-9])\.[0-9]\.[0-9]", os.environ["CUDNN_VERSION"])
-                if m:
-                    version = m.group(1)
-                if FLAGS.cudnn_home is None:
-                    FLAGS.cudnn_home = "/usr/local/cudnn-{}/cuda".format(version)
+            if FLAGS.enable_gpu:
+                if "CUDNN_VERSION" in os.environ:
+                    version = None
+                    m = re.match(r"([0-9]\.[0-9])\.[0-9]\.[0-9]", os.environ["CUDNN_VERSION"])
+                    if m:
+                        version = m.group(1)
+                    if FLAGS.cudnn_home is None:
+                        FLAGS.cudnn_home = "/usr/local/cudnn-{}/cuda".format(version)
 
-            if FLAGS.cuda_home is None:
-                FLAGS.cuda_home = "/usr/local/cuda"
+                if FLAGS.cuda_home is None:
+                    FLAGS.cuda_home = "/usr/local/cuda"
 
-            if (FLAGS.cuda_home is None) or (FLAGS.cudnn_home is None):
-                print("error: linux build requires --cudnn-home and --cuda-home")
+                if (FLAGS.cuda_home is None) or (FLAGS.cudnn_home is None):
+                    print("error: linux build requires --cudnn-home and --cuda-home")
 
-            if FLAGS.tensorrt_home is None:
-                FLAGS.tensorrt_home = "/usr/src/tensorrt"
-                
-            if FLAGS.rocm_home is None:
-                FLAGS.rocm_home = "/opt/rocm/"
+                if FLAGS.tensorrt_home is None:
+                    FLAGS.tensorrt_home = "/usr/src/tensorrt"
+
+            if FLAGS.enable_rocm:
+                if FLAGS.rocm_home is None:
+                    FLAGS.rocm_home = "/opt/rocm/"
 
-            if FLAGS.migraphx_home is None:
-                FLAGS.migraphx_home = "/opt/rocm/"
+                if FLAGS.migraphx_home is None:
+                    FLAGS.migraphx_home = "/opt/rocm/"
 
 
 
@@ -746,7 +748,7 @@ def preprocess_gpu_flags():
     parser.add_argument("--migraphx-version", type=str, default="", help="MIGraphX version.")
 
     FLAGS = parser.parse_args()
-    if FLAGS.enable_gpu:
+    if FLAGS.enable_gpu or FLAGS.enable_rocm:
         preprocess_gpu_flags()
 
     # if a tag is provided by the user, then simply use it
@@ -770,6 +772,11 @@ def preprocess_gpu_flags():
         if FLAGS.ort_openvino is not None:
             print("warning: OpenVINO not supported for windows, ignoring")
             FLAGS.ort_openvino = None
+
+        print("Writing to output for Windows")
         dockerfile_for_windows(FLAGS.output)
+        print("Done")
     else:
+        print("Writing to output for Linux")
         dockerfile_for_linux(FLAGS.output)
+        print("Done")

From 80ae1dbfdc218dcfd4961b33626b6b11bfab83ec Mon Sep 17 00:00:00 2001
From: Ted Themistokleous <tedthemistokleous@amd.com>
Date: Fri, 23 Feb 2024 17:53:49 +0000
Subject: [PATCH 25/32] Fix issue where we weren't output Dockerfile correctly

---
 tools/gen_ort_dockerfile.py | 474 +++++++++++++++++++-----------------
 1 file changed, 248 insertions(+), 226 deletions(-)

diff --git a/tools/gen_ort_dockerfile.py b/tools/gen_ort_dockerfile.py
index 134aa7b..b3867c8 100755
--- a/tools/gen_ort_dockerfile.py
+++ b/tools/gen_ort_dockerfile.py
@@ -163,13 +163,13 @@ def dockerfile_for_linux(output_file):
     apt-get install -y rocrand rccl hipsparse hipfft hipcub hipblas rocthrust hip-base rocm-device-libs hipify-clang  miopen-hip-dev rocm-cmake
 """
 
-        if FLAGS.ort_migraphx:
-            if FLAGS.migraphx_version is not None:
-                df+= """ARG MIGRAPHX_VERSION={}""".format(FLAGS.migraphx_version)
-            else:
-                df+= """ARG MIGRAPHX_VERSION=develop"""
+    if FLAGS.ort_migraphx:
+        if FLAGS.migraphx_version is not None:
+            df+= """ARG MIGRAPHX_VERSION={}""".format(FLAGS.migraphx_version)
+        else:
+            df+= """ARG MIGRAPHX_VERSION=develop"""
 
-            df += """
+        df += """
 # Install MIGraphX from source
 ARG GPU_TARGETS='gfx908;gfx90a;gfx1030;gfx1100;gfx1101;gfx1102;gfx940;gfx941;gfx942'
 RUN mkdir -p /migraphx
@@ -180,135 +180,137 @@ def dockerfile_for_linux(output_file):
     """
 
 
-        if FLAGS.ort_openvino is not None:
-            df += """
-    # Install OpenVINO
-    ARG ONNXRUNTIME_OPENVINO_VERSION
-    ENV INTEL_OPENVINO_DIR /opt/intel/openvino_${ONNXRUNTIME_OPENVINO_VERSION}
-
-    # Step 1: Download and install core components
-    # Ref: https://docs.openvino.ai/2023.0/openvino_docs_install_guides_installing_openvino_from_archive_linux.html#step-1-download-and-install-the-openvino-core-components
-    RUN curl -L https://storage.openvinotoolkit.org/repositories/openvino/packages/2023.0/linux/l_openvino_toolkit_ubuntu22_2023.0.0.10926.b4452d56304_x86_64.tgz --output openvino_${ONNXRUNTIME_OPENVINO_VERSION}.tgz && \
-        tar -xf openvino_${ONNXRUNTIME_OPENVINO_VERSION}.tgz && \
-        mkdir -p ${INTEL_OPENVINO_DIR} && \
-        mv l_openvino_toolkit_ubuntu22_2023.0.0.10926.b4452d56304_x86_64/* ${INTEL_OPENVINO_DIR} && \
-        rm openvino_${ONNXRUNTIME_OPENVINO_VERSION}.tgz && \
-        (cd ${INTEL_OPENVINO_DIR}/install_dependencies && \
-            ./install_openvino_dependencies.sh -y) && \
-        ln -s ${INTEL_OPENVINO_DIR} ${INTEL_OPENVINO_DIR}/../openvino_`echo ${ONNXRUNTIME_OPENVINO_VERSION} | awk '{print substr($0,0,4)}'`
-
-    # Step 2: Configure the environment
-    # Ref: https://docs.openvino.ai/2023.0/openvino_docs_install_guides_installing_openvino_from_archive_linux.html#step-2-configure-the-environment
-    ENV InferenceEngine_DIR=$INTEL_OPENVINO_DIR/runtime/cmake
-    ENV ngraph_DIR=$INTEL_OPENVINO_DIR/runtime/cmake
-    ENV OpenVINO_DIR=$INTEL_OPENVINO_DIR/runtime/cmake
-    ENV LD_LIBRARY_PATH $INTEL_OPENVINO_DIR/runtime/lib/intel64:$LD_LIBRARY_PATH
-    ENV PKG_CONFIG_PATH=$INTEL_OPENVINO_DIR/runtime/lib/intel64/pkgconfig
-    ENV PYTHONPATH $INTEL_OPENVINO_DIR/python/python3.10:$INTEL_OPENVINO_DIR/python/python3:$PYTHONPATH
-    """
+    if FLAGS.ort_openvino is not None:
+        df += """
+# Install OpenVINO
+ARG ONNXRUNTIME_OPENVINO_VERSION
+ENV INTEL_OPENVINO_DIR /opt/intel/openvino_${ONNXRUNTIME_OPENVINO_VERSION}
+
+# Step 1: Download and install core components
+# Ref: https://docs.openvino.ai/2023.0/openvino_docs_install_guides_installing_openvino_from_archive_linux.html#step-1-download-and-install-the-openvino-core-components
+RUN curl -L https://storage.openvinotoolkit.org/repositories/openvino/packages/2023.0/linux/l_openvino_toolkit_ubuntu22_2023.0.0.10926.b4452d56304_x86_64.tgz --output openvino_${ONNXRUNTIME_OPENVINO_VERSION}.tgz && \
+    tar -xf openvino_${ONNXRUNTIME_OPENVINO_VERSION}.tgz && \
+    mkdir -p ${INTEL_OPENVINO_DIR} && \
+    mv l_openvino_toolkit_ubuntu22_2023.0.0.10926.b4452d56304_x86_64/* ${INTEL_OPENVINO_DIR} && \
+    rm openvino_${ONNXRUNTIME_OPENVINO_VERSION}.tgz && \
+    (cd ${INTEL_OPENVINO_DIR}/install_dependencies && \
+        ./install_openvino_dependencies.sh -y) && \
+    ln -s ${INTEL_OPENVINO_DIR} ${INTEL_OPENVINO_DIR}/../openvino_`echo ${ONNXRUNTIME_OPENVINO_VERSION} | awk '{print substr($0,0,4)}'`
+
+# Step 2: Configure the environment
+# Ref: https://docs.openvino.ai/2023.0/openvino_docs_install_guides_installing_openvino_from_archive_linux.html#step-2-configure-the-environment
+ENV InferenceEngine_DIR=$INTEL_OPENVINO_DIR/runtime/cmake
+ENV ngraph_DIR=$INTEL_OPENVINO_DIR/runtime/cmake
+ENV OpenVINO_DIR=$INTEL_OPENVINO_DIR/runtime/cmake
+ENV LD_LIBRARY_PATH $INTEL_OPENVINO_DIR/runtime/lib/intel64:$LD_LIBRARY_PATH
+ENV PKG_CONFIG_PATH=$INTEL_OPENVINO_DIR/runtime/lib/intel64/pkgconfig
+ENV PYTHONPATH $INTEL_OPENVINO_DIR/python/python3.10:$INTEL_OPENVINO_DIR/python/python3:$PYTHONPATH
+"""
 
-        ## TEMPORARY: Using the tensorrt-8.0 branch until ORT 1.9 release to enable ORT backend with TRT 8.0 support.
-        # For ORT versions 1.8.0 and below the behavior will remain same. For ORT version 1.8.1 we will
-        # use tensorrt-8.0 branch instead of using rel-1.8.1
-        # From ORT 1.9 onwards we will switch back to using rel-* branches
-        if FLAGS.ort_version == "1.8.1":
-            df += """
-        #
-        # ONNX Runtime build
-        #
-        ARG ONNXRUNTIME_VERSION
-        ARG ONNXRUNTIME_REPO
-        ARG ONNXRUNTIME_BUILD_CONFIG
-
-        RUN git clone -b tensorrt-8.0 --recursive ${ONNXRUNTIME_REPO} onnxruntime && \
-            (cd onnxruntime && git submodule update --init --recursive)
+    ## TEMPORARY: Using the tensorrt-8.0 branch until ORT 1.9 release to enable ORT backend with TRT 8.0 support.
+    # For ORT versions 1.8.0 and below the behavior will remain same. For ORT version 1.8.1 we will
+    # use tensorrt-8.0 branch instead of using rel-1.8.1
+    # From ORT 1.9 onwards we will switch back to using rel-* branches
+    if FLAGS.ort_version == "1.8.1":
+        df += """
+    #
+    # ONNX Runtime build
+    #
+    ARG ONNXRUNTIME_VERSION
+    ARG ONNXRUNTIME_REPO
+    ARG ONNXRUNTIME_BUILD_CONFIG
 
-        """
-        # Use the tensorrt-8.5ea branch to use Tensor RT 8.5a to use the built-in tensorrt parser
-        elif FLAGS.ort_version == "1.12.1":
-            df += """
-        #
-        # ONNX Runtime build
-        #
-        ARG ONNXRUNTIME_VERSION
-        ARG ONNXRUNTIME_REPO
-        ARG ONNXRUNTIME_BUILD_CONFIG
+    RUN git clone -b tensorrt-8.0 --recursive ${ONNXRUNTIME_REPO} onnxruntime && \
+        (cd onnxruntime && git submodule update --init --recursive)
+
+    """
+    # Use the tensorrt-8.5ea branch to use Tensor RT 8.5a to use the built-in tensorrt parser
+    elif FLAGS.ort_version == "1.12.1":
+        df += """
+    #
+    # ONNX Runtime build
+    #
+    ARG ONNXRUNTIME_VERSION
+    ARG ONNXRUNTIME_REPO
+    ARG ONNXRUNTIME_BUILD_CONFIG
 
         RUN git clone -b tensorrt-8.5ea --recursive ${ONNXRUNTIME_REPO} onnxruntime && \
             (cd onnxruntime && git submodule update --init --recursive)
 
         """
-        elif FLAGS.enable_rocm:
+    elif FLAGS.enable_rocm:
             df += """
-        #
-        # onnx runtime build
-        #
-        ARG ONNXRUNTIME_VERSION
-        ARG ONNXRUNTIME_REPO
-        ARG ONNXRUNTIME_BUILD_CONFIG
-
-        run git clone -b ${ONNXRUNTIME_VERSION} --recursive ${ONNXRUNTIME_REPO} onnxruntime && \
-            (cd onnxruntime && git submodule update --init --recursive)
+    #
+    # onnx runtime build
+    #
+    ARG ONNXRUNTIME_VERSION
+    ARG ONNXRUNTIME_REPO
+    ARG ONNXRUNTIME_BUILD_CONFIG
 
-            """
+    run git clone -b ${ONNXRUNTIME_VERSION} --recursive ${ONNXRUNTIME_REPO} onnxruntime && \
+        (cd onnxruntime && git submodule update --init --recursive)
 
-        else:
-            df += """
-        #
-        # onnx runtime build
-        #
-        arg onnxruntime_version
-        arg onnxruntime_repo
-        arg onnxruntime_build_config
-
-        run git clone -b rel-${onnxruntime_version} --recursive ${onnxruntime_repo} onnxruntime && \
-            (cd onnxruntime && git submodule update --init --recursive)
+        """
 
-            """
+    else:
+        df += """
+    #
+    # onnx runtime build
+    #
+    arg onnxruntime_version
+    arg onnxruntime_repo
+    arg onnxruntime_build_config
 
-        if FLAGS.onnx_tensorrt_tag != "":
-            df += """
-        RUN (cd /workspace/onnxruntime/cmake/external/onnx-tensorrt && git fetch origin {}:ortrefbranch && git checkout ortrefbranch)
-        """.format(
-                FLAGS.onnx_tensorrt_tag
-            )
+    run git clone -b rel-${onnxruntime_version} --recursive ${onnxruntime_repo} onnxruntime && \
+        (cd onnxruntime && git submodule update --init --recursive)
 
-        ep_flags = ""
-        if FLAGS.enable_gpu:
-            ep_flags = "--use_cuda"
-            if FLAGS.cuda_version is not None:
-                ep_flags += ' --cuda_version "{}"'.format(FLAGS.cuda_version)
-            if FLAGS.cuda_home is not None:
-                ep_flags += ' --cuda_home "{}"'.format(FLAGS.cuda_home)
-            if FLAGS.cudnn_home is not None:
-                ep_flags += ' --cudnn_home "{}"'.format(FLAGS.cudnn_home)
-            if FLAGS.ort_tensorrt:
-                ep_flags += " --use_tensorrt"
-                if FLAGS.ort_version >= "1.12.1":
-                    ep_flags += " --use_tensorrt_builtin_parser"
-                if FLAGS.tensorrt_home is not None:
-                    ep_flags += ' --tensorrt_home "{}"'.format(FLAGS.tensorrt_home)
-            cmake_defs = "CMAKE_CUDA_ARCHITECTURES"
-            cuda_archs = "\'60;61;70;75;80;86;90\'"
+        """
 
-        if FLAGS.enable_rocm: 
-            ep_flags = "--use_rocm"
-            ep_flags += " --allow_running_as_root"
-            df += """
+    if FLAGS.onnx_tensorrt_tag != "":
+        df += """
+    RUN (cd /workspace/onnxruntime/cmake/external/onnx-tensorrt && git fetch origin {}:ortrefbranch && git checkout ortrefbranch)
+    """.format(
+            FLAGS.onnx_tensorrt_tag
+        )
+
+    ep_flags = ""
+    if FLAGS.enable_gpu:
+        ep_flags = "--use_cuda"
+        if FLAGS.cuda_version is not None:
+            ep_flags += ' --cuda_version "{}"'.format(FLAGS.cuda_version)
+        if FLAGS.cuda_home is not None:
+            ep_flags += ' --cuda_home "{}"'.format(FLAGS.cuda_home)
+        if FLAGS.cudnn_home is not None:
+            ep_flags += ' --cudnn_home "{}"'.format(FLAGS.cudnn_home)
+    elif target_platform() == "igpu":
+        ep_flags += ' --cudnn_home "/usr/lib/aarch64-linux-gnu"'
+        if FLAGS.ort_tensorrt:
+            ep_flags += " --use_tensorrt"
+            if FLAGS.ort_version >= "1.12.1":
+                ep_flags += " --use_tensorrt_builtin_parser"
+            if FLAGS.tensorrt_home is not None:
+                ep_flags += ' --tensorrt_home "{}"'.format(FLAGS.tensorrt_home)
+        cmake_defs = "CMAKE_CUDA_ARCHITECTURES"
+        cuda_archs = "\'60;61;70;75;80;86;90\'"
+
+    if FLAGS.enable_rocm: 
+        ep_flags = "--use_rocm"
+        ep_flags += " --allow_running_as_root"
+        df += """
     RUN export PATH="/opt/cmake/bin:$PATH"
     RUN export CXXFLAGS="-D__HIP_PLATFORM_AMD__=1 -w"
             """
-            #if FLAGS.rocm_version is not None:
-            #ep_flags += ' --rocm_version "{}"'.format(FLAGS.rocm_version)
-            if FLAGS.rocm_home is not None:
-                ep_flags += ' --rocm_home "{}"'.format(FLAGS.rocm_home)
-            if FLAGS.ort_migraphx:
-                ep_flags += " --use_migraphx"
-                if FLAGS.migraphx_home is not None:
-                    ep_flags += ' --migraphx_home "{}"'.format(FLAGS.migraphx_home)
-            cmake_defs = "CMAKE_HIP_COMPILER"
-            cuda_archs = "/opt/rocm/llvm/bin/clang++"
-            ep_flags += " --allow_running_as_root"
+        if FLAGS.rocm_version is not None:
+            ep_flags += ' --rocm_version "{}"'.format(FLAGS.rocm_version)
+        if FLAGS.rocm_home is not None:
+            ep_flags += ' --rocm_home "{}"'.format(FLAGS.rocm_home)
+        if FLAGS.ort_migraphx:
+            ep_flags += " --use_migraphx"
+            if FLAGS.migraphx_home is not None:
+                ep_flags += ' --migraphx_home "{}"'.format(FLAGS.migraphx_home)
+        cmake_defs = "CMAKE_HIP_COMPILER"
+        cuda_archs = "/opt/rocm/llvm/bin/clang++"
+        ep_flags += " --allow_running_as_root"
 
         if os.name == "posix":
             if os.getuid() == 0:
@@ -317,6 +319,13 @@ def dockerfile_for_linux(output_file):
         if FLAGS.ort_openvino is not None:
             ep_flags += " --use_openvino CPU_FP32"
 
+    if target_platform() == "igpu":
+        ep_flags += (
+            " --skip_tests --cmake_extra_defines 'onnxruntime_BUILD_UNIT_TESTS=OFF'"
+        )        
+        cuda_archs = "53;62;72;87"
+    elif not FLAGS.enable_rocm:
+        cuda_archs = "60;61;70;75;80;86;90"
 
         df += """
     WORKDIR /workspace/onnxruntime
@@ -357,7 +366,13 @@ def dockerfile_for_linux(output_file):
         /opt/onnxruntime/lib && \
         cp /workspace/build/${ONNXRUNTIME_BUILD_CONFIG}/libonnxruntime.so \
         /opt/onnxruntime/lib
-
+"""
+    if target_platform() == "igpu":
+        df += """
+RUN mkdir -p /opt/onnxruntime/bin
+"""
+    else:
+        df += """
     RUN mkdir -p /opt/onnxruntime/bin && \
         cp /workspace/build/${ONNXRUNTIME_BUILD_CONFIG}/onnxruntime_perf_test \
         /opt/onnxruntime/bin && \
@@ -442,6 +457,13 @@ def dockerfile_for_linux(output_file):
         done
 
     # For testing copy ONNX custom op library and model
+"""
+    if target_platform() == "igpu":
+        df += """
+RUN mkdir -p /opt/onnxruntime/test
+"""
+    else:
+        df += """
     RUN mkdir -p /opt/onnxruntime/test && \
         cp /workspace/build/${ONNXRUNTIME_BUILD_CONFIG}/libcustom_op_library.so \
         /opt/onnxruntime/test && \
@@ -611,125 +633,125 @@ def dockerfile_for_windows(output_file):
             dfile.write(df)
 
 
-    def preprocess_gpu_flags():
-        if target_platform() == "windows":
-            # Default to CUDA based on CUDA_PATH envvar and TensorRT in
-            # C:/tensorrt
-            if "CUDA_PATH" in os.environ:
-                if FLAGS.cuda_home is None:
-                    FLAGS.cuda_home = os.environ["CUDA_PATH"]
-                elif FLAGS.cuda_home != os.environ["CUDA_PATH"]:
-                    print("warning: --cuda-home does not match CUDA_PATH envvar")
-
-            if FLAGS.cudnn_home is None:
-                FLAGS.cudnn_home = FLAGS.cuda_home
-
-            version = None
-            m = re.match(r".*v([1-9]?[0-9]+\.[0-9]+)$", FLAGS.cuda_home)
-            if m:
-                version = m.group(1)
+def preprocess_gpu_flags():
+    if target_platform() == "windows":
+        # Default to CUDA based on CUDA_PATH envvar and TensorRT in
+        # C:/tensorrt
+        if "CUDA_PATH" in os.environ:
+            if FLAGS.cuda_home is None:
+                FLAGS.cuda_home = os.environ["CUDA_PATH"]
+            elif FLAGS.cuda_home != os.environ["CUDA_PATH"]:
+                print("warning: --cuda-home does not match CUDA_PATH envvar")
+
+        if FLAGS.cudnn_home is None:
+            FLAGS.cudnn_home = FLAGS.cuda_home
+
+        version = None
+        m = re.match(r".*v([1-9]?[0-9]+\.[0-9]+)$", FLAGS.cuda_home)
+        if m:
+            version = m.group(1)
+
+        if FLAGS.cuda_version is None:
+            FLAGS.cuda_version = version
+        elif FLAGS.cuda_version != version:
+            print("warning: --cuda-version does not match CUDA_PATH envvar")
+
+        if (FLAGS.cuda_home is None) or (FLAGS.cuda_version is None):
+            print("error: windows build requires --cuda-version and --cuda-home")
+
+        if FLAGS.tensorrt_home is None:
+            FLAGS.tensorrt_home = "/tensorrt"
+    else:
+        if FLAGS.enable_gpu:
+            if "CUDNN_VERSION" in os.environ:
+                version = None
+                m = re.match(r"([0-9]\.[0-9])\.[0-9]\.[0-9]", os.environ["CUDNN_VERSION"])
+                if m:
+                    version = m.group(1)
+                if FLAGS.cudnn_home is None:
+                    FLAGS.cudnn_home = "/usr/local/cudnn-{}/cuda".format(version)
 
-            if FLAGS.cuda_version is None:
-                FLAGS.cuda_version = version
-            elif FLAGS.cuda_version != version:
-                print("warning: --cuda-version does not match CUDA_PATH envvar")
+            if FLAGS.cuda_home is None:
+                FLAGS.cuda_home = "/usr/local/cuda"
 
-            if (FLAGS.cuda_home is None) or (FLAGS.cuda_version is None):
-                print("error: windows build requires --cuda-version and --cuda-home")
+            if (FLAGS.cuda_home is None) or (FLAGS.cudnn_home is None):
+                print("error: linux build requires --cudnn-home and --cuda-home")
 
             if FLAGS.tensorrt_home is None:
-                FLAGS.tensorrt_home = "/tensorrt"
-        else:
-            if FLAGS.enable_gpu:
-                if "CUDNN_VERSION" in os.environ:
-                    version = None
-                    m = re.match(r"([0-9]\.[0-9])\.[0-9]\.[0-9]", os.environ["CUDNN_VERSION"])
-                    if m:
-                        version = m.group(1)
-                    if FLAGS.cudnn_home is None:
-                        FLAGS.cudnn_home = "/usr/local/cudnn-{}/cuda".format(version)
-
-                if FLAGS.cuda_home is None:
-                    FLAGS.cuda_home = "/usr/local/cuda"
-
-                if (FLAGS.cuda_home is None) or (FLAGS.cudnn_home is None):
-                    print("error: linux build requires --cudnn-home and --cuda-home")
+                FLAGS.tensorrt_home = "/usr/src/tensorrt"
 
-                if FLAGS.tensorrt_home is None:
-                    FLAGS.tensorrt_home = "/usr/src/tensorrt"
-
-            if FLAGS.enable_rocm:
-                if FLAGS.rocm_home is None:
-                    FLAGS.rocm_home = "/opt/rocm/"
+        if FLAGS.enable_rocm:
+            if FLAGS.rocm_home is None:
+                FLAGS.rocm_home = "/opt/rocm/"
 
-                if FLAGS.migraphx_home is None:
-                    FLAGS.migraphx_home = "/opt/rocm/"
+            if FLAGS.migraphx_home is None:
+                FLAGS.migraphx_home = "/opt/rocm/"
 
 
 
-    if __name__ == "__main__":
-        parser = argparse.ArgumentParser()
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
 
-        parser.add_argument(
-            "--triton-container",
-            type=str,
-            required=True,
-            help="Triton base container to use for ORT build.",
-        )
-        parser.add_argument("--ort-version", type=str, required=True, help="ORT version.")
-        parser.add_argument(
-            "--output", type=str, required=True, help="File to write Dockerfile to."
-        )
-        parser.add_argument(
-            "--enable-gpu", action="store_true", required=False, help="Enable GPU support"
-        )
-        parser.add_argument(
-            "--enable-rocm", action="store_true", required=False, help="Enable GPU support"
-        )
-        parser.add_argument(
-            "--ort-build-config",
-            type=str,
-            default="Release",
-            choices=["Debug", "Release", "RelWithDebInfo"],
-            help="ORT build configuration.",
-        )
-        parser.add_argument(
-            "--target-platform",
-            required=False,
-            default=None,
-            help='Target for build, can be "ubuntu", "windows" or "jetpack". If not specified, build targets the current platform.',
-        )
+    parser.add_argument(
+        "--triton-container",
+        type=str,
+        required=True,
+        help="Triton base container to use for ORT build.",
+    )
+    parser.add_argument("--ort-version", type=str, required=True, help="ORT version.")
+    parser.add_argument(
+        "--output", type=str, required=True, help="File to write Dockerfile to."
+    )
+    parser.add_argument(
+        "--enable-gpu", action="store_true", required=False, help="Enable GPU support"
+    )
+    parser.add_argument(
+        "--enable-rocm", action="store_true", required=False, help="Enable GPU support"
+    )
+    parser.add_argument(
+        "--ort-build-config",
+        type=str,
+        default="Release",
+        choices=["Debug", "Release", "RelWithDebInfo"],
+        help="ORT build configuration.",
+    )
+    parser.add_argument(
+        "--target-platform",
+        required=False,
+        default=None,
+        help='Target for build, can be "ubuntu", "windows" or "jetpack". If not specified, build targets the current platform.',
+    )
 
-        parser.add_argument(
-            "--cuda-version", type=str, required=False, help="Version for CUDA."
-        )
-        parser.add_argument(
-            "--cuda-home", type=str, required=False, help="Home directory for CUDA."
-        )
-        parser.add_argument(
-            "--rocm-version", type=str, required=False, help="Version for ROCM."
-        )
-        parser.add_argument(
-            "--rocm-home", type=str, required=False, help="Home directory for ROCM."
-        )
+    parser.add_argument(
+        "--cuda-version", type=str, required=False, help="Version for CUDA."
+    )
+    parser.add_argument(
+        "--cuda-home", type=str, required=False, help="Home directory for CUDA."
+    )
+    parser.add_argument(
+        "--rocm-version", type=str, required=False, help="Version for ROCM."
+    )
+    parser.add_argument(
+        "--rocm-home", type=str, required=False, help="Home directory for ROCM."
+    )
 
-        parser.add_argument(
-            "--cudnn-home", type=str, required=False, help="Home directory for CUDNN."
-        )
-        parser.add_argument(
-            "--ort-openvino",
-            type=str,
-            required=False,
-            help="Enable OpenVino execution provider using specified OpenVINO version.",
-        )
-        parser.add_argument(
-            "--ort-tensorrt",
-            action="store_true",
-            required=False,
-            help="Enable TensorRT execution provider.",
-        )
-        parser.add_argument(
-        "--tensorrt-home", type=str, required=False, help="Home directory for TensorRT."
+    parser.add_argument(
+        "--cudnn-home", type=str, required=False, help="Home directory for CUDNN."
+    )
+    parser.add_argument(
+        "--ort-openvino",
+        type=str,
+        required=False,
+        help="Enable OpenVino execution provider using specified OpenVINO version.",
+    )
+    parser.add_argument(
+        "--ort-tensorrt",
+        action="store_true",
+        required=False,
+        help="Enable TensorRT execution provider.",
+    )
+    parser.add_argument(
+    "--tensorrt-home", type=str, required=False, help="Home directory for TensorRT."
     )
     parser.add_argument(
         "--onnx-tensorrt-tag", type=str, default="", help="onnx-tensorrt repo tag."

From 5160558ac349920b82bc102d2d1a13da0a73302b Mon Sep 17 00:00:00 2001
From: Ted Themistokleous <tedthemistokleous@amd.com>
Date: Fri, 23 Feb 2024 21:51:17 +0000
Subject: [PATCH 26/32] fix dir for MIGraphX dockerfile

---
 tools/gen_ort_dockerfile.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/gen_ort_dockerfile.py b/tools/gen_ort_dockerfile.py
index b3867c8..807fa8b 100755
--- a/tools/gen_ort_dockerfile.py
+++ b/tools/gen_ort_dockerfile.py
@@ -174,7 +174,7 @@ def dockerfile_for_linux(output_file):
 ARG GPU_TARGETS='gfx908;gfx90a;gfx1030;gfx1100;gfx1101;gfx1102;gfx940;gfx941;gfx942'
 RUN mkdir -p /migraphx
     RUN cd /migraphx && git clone --depth=1 --branch ${MIGRAPHX_VERSION} https://github.com/ROCm/AMDMIGraphX src
-    RUN cd /migraphx && rbuild package --cxx /opt/rocm/llvm/bin/clang++ -d /migraphx/deps -B /migraphx/build -S /migraphx/src/ -DPYTHON_EXECUTABLE=/usr/bin/python3 -DGPU_TARGETS=${GPU_TARGETS}
+    RUN rbuild package --cxx /opt/rocm/llvm/bin/clang++ -d /migraphx/deps -B /migraphx/build -S /migraphx/src/ -DPYTHON_EXECUTABLE=/usr/bin/python3 -DGPU_TARGETS=${GPU_TARGETS}
     RUN dpkg -i /migraphx/build/*.deb
     RUN rm -rf /migraphx
     """

From 779f0174d3a6dbe5921415c12032f1f3a62d3b49 Mon Sep 17 00:00:00 2001
From: Ted Themistokleous <tedthemistokleous@amd.com>
Date: Sat, 24 Feb 2024 00:03:29 +0000
Subject: [PATCH 27/32] Update migraphx build arg

---
 tools/gen_ort_dockerfile.py | 27 ++++++++++++++++-----------
 1 file changed, 16 insertions(+), 11 deletions(-)

diff --git a/tools/gen_ort_dockerfile.py b/tools/gen_ort_dockerfile.py
index 807fa8b..f63b65e 100755
--- a/tools/gen_ort_dockerfile.py
+++ b/tools/gen_ort_dockerfile.py
@@ -140,27 +140,27 @@ def dockerfile_for_linux(output_file):
 
 # Install rocm
 RUN apt-get update && apt-get install -y gnupg2 --no-install-recommends curl && \
-curl -sL http://repo.radeon.com/rocm/rocm.gpg.key | apt-key add - && \
-sh -c 'echo deb [arch=amd64] http://repo.radeon.com/rocm/apt/${ROCM_VERSION}/ ubuntu main > /etc/apt/sources.list.d/rocm.list'
+curl -fsSL http://repo.radeon.com/rocm/rocm.gpg.key | gpg --dearmor -o /etc/apt/trusted.gpg.d/rocm-keyring.gpg && \
+sh -c 'echo deb [arch=amd64] http://repo.radeon.com/rocm/apt/${ROCM_VERSION}/ jammy main > /etc/apt/sources.list.d/rocm.list'
 
 # From docs.amd.com for installing rocm. Needed to install properly
 RUN sh -c \"echo 'Package: *\\nPin: release o=repo.radeon.com\\nPin-priority: 600' > /etc/apt/preferences.d/rocm-pin-600\"
 
 RUN apt-get update &&\
-    apt-get install -y sudo git bash build-essential rocm-dev python3-dev python3-pip miopen-hip \
-    rocblas half aria2 libnuma-dev pkg-config
+    apt-get install -y sudo git apt-utils bash build-essential curl doxygen gdb rocm-dev python3-dev python3-pip miopen-hip \
+    rocblas half aria2 libnuma-dev pkg-config ccache software-properties-common wget libnuma-dev libssl-dev zlib1g-dev
 
 RUN aria2c -q -d /tmp -o cmake-3.27.3-linux-x86_64.tar.gz \
 https://github.com/Kitware/CMake/releases/download/v3.27.3/cmake-3.27.3-linux-x86_64.tar.gz &&\
 tar -zxf /tmp/cmake-3.27.3-linux-x86_64.tar.gz --strip=1 -C /usr
 
 # Install rbuild
-RUN pip3 install https://github.com/RadeonOpenCompute/rbuild/archive/master.tar.gz numpy yapf==0.28.0
+RUN pip3 install https://github.com/RadeonOpenCompute/rbuild/archive/master.tar.gz numpy yapf==0.28.0 asciidoc CppHeaderParser setuptools wheel
 
 ENV PATH /opt/miniconda/bin:/code/cmake-3.27.3-linux-x86_64/bin:${PATH}
 # Install rocm ep dependencies
 RUN apt-get update &&\
-    apt-get install -y rocrand rccl hipsparse hipfft hipcub hipblas rocthrust hip-base rocm-device-libs hipify-clang  miopen-hip-dev rocm-cmake
+    apt-get install -y rocrand rccl rccl-dev hipsparse hipfft hipcub hipblas rocthrust hip-base rocm-device-libs hipify-clang miopen-hip-dev rocm-cmake
 """
 
     if FLAGS.ort_migraphx:
@@ -172,11 +172,16 @@ def dockerfile_for_linux(output_file):
         df += """
 # Install MIGraphX from source
 ARG GPU_TARGETS='gfx908;gfx90a;gfx1030;gfx1100;gfx1101;gfx1102;gfx940;gfx941;gfx942'
-RUN mkdir -p /migraphx
-    RUN cd /migraphx && git clone --depth=1 --branch ${MIGRAPHX_VERSION} https://github.com/ROCm/AMDMIGraphX src
-    RUN rbuild package --cxx /opt/rocm/llvm/bin/clang++ -d /migraphx/deps -B /migraphx/build -S /migraphx/src/ -DPYTHON_EXECUTABLE=/usr/bin/python3 -DGPU_TARGETS=${GPU_TARGETS}
-    RUN dpkg -i /migraphx/build/*.deb
-    RUN rm -rf /migraphx
+
+# Workaround broken rocm packages
+RUN ln -s /opt/rocm-* /opt/rocm
+RUN echo "/opt/rocm/lib" > /etc/ld.so.conf.d/rocm.conf
+RUN echo "/opt/rocm/llvm/lib" > /etc/ld.so.conf.d/rocm-llvm.conf
+RUN ldconfig
+
+RUN mkdir /migraphx
+RUN cd /migraphx && git clone --depth=1 --branch ${MIGRAPHX_VERSION} https://github.com/ROCm/AMDMIGraphX src && cd src && rbuild package --cxx /opt/rocm/llvm/bin/clang++ -d /migraphx/deps -B /migraphx/build -DPYTHON_EXECUTABLE=/usr/bin/python3 -DBUILD_DEV=On -DGPU_TARGETS=${GPU_TARGETS} && dpkg -i /migraphx/build/*.deb
+RUN cd / && rm -rf /migraphx
     """
 
 

From 3386bf21d12d1ebb1ab7bce860346d3ea3fa3d3f Mon Sep 17 00:00:00 2001
From: Ted Themistokleous <tedthemistokleous@amd.com>
Date: Sat, 24 Feb 2024 18:35:57 +0000
Subject: [PATCH 28/32] Remove workspace dir and work off root for Onnxruntime

---
 tools/gen_ort_dockerfile.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/tools/gen_ort_dockerfile.py b/tools/gen_ort_dockerfile.py
index f63b65e..26c35b7 100755
--- a/tools/gen_ort_dockerfile.py
+++ b/tools/gen_ort_dockerfile.py
@@ -333,9 +333,9 @@ def dockerfile_for_linux(output_file):
         cuda_archs = "60;61;70;75;80;86;90"
 
         df += """
-    WORKDIR /workspace/onnxruntime
+    WORKDIR /onnxruntime
     ARG COMMON_BUILD_ARGS="--config ${{ONNXRUNTIME_BUILD_CONFIG}} --skip_submodule_sync --parallel --build_shared_lib \
-        --build_dir /workspace/build --cmake_extra_defines {}={} "
+        --build_dir /build --cmake_extra_defines {}={} "
     """.format(
             cmake_defs,
             cuda_archs
@@ -354,22 +354,22 @@ def dockerfile_for_linux(output_file):
     WORKDIR /opt/onnxruntime
 
     RUN mkdir -p /opt/onnxruntime && \
-        cp /workspace/onnxruntime/LICENSE /opt/onnxruntime && \
-        cat /workspace/onnxruntime/cmake/external/onnx/VERSION_NUMBER > /opt/onnxruntime/ort_onnx_version.txt
+        cp /onnxruntime/LICENSE /opt/onnxruntime && \
+        cat /onnxruntime/cmake/external/onnx/VERSION_NUMBER > /opt/onnxruntime/ort_onnx_version.txt
 
     # ONNX Runtime headers, libraries and binaries
     RUN mkdir -p /opt/onnxruntime/include && \
-        cp /workspace/onnxruntime/include/onnxruntime/core/session/onnxruntime_c_api.h \
+        cp /onnxruntime/include/onnxruntime/core/session/onnxruntime_c_api.h \
         /opt/onnxruntime/include && \
-        cp /workspace/onnxruntime/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h \
+        cp /onnxruntime/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h \
         /opt/onnxruntime/include && \
-        cp /workspace/onnxruntime/include/onnxruntime/core/providers/cpu/cpu_provider_factory.h \
+        cp /onnxruntime/include/onnxruntime/core/providers/cpu/cpu_provider_factory.h \
         /opt/onnxruntime/include
 
     RUN mkdir -p /opt/onnxruntime/lib && \
-        cp /workspace/build/${ONNXRUNTIME_BUILD_CONFIG}/libonnxruntime_providers_shared.so \
+        cp /build/${ONNXRUNTIME_BUILD_CONFIG}/libonnxruntime_providers_shared.so \
         /opt/onnxruntime/lib && \
-        cp /workspace/build/${ONNXRUNTIME_BUILD_CONFIG}/libonnxruntime.so \
+        cp /build/${ONNXRUNTIME_BUILD_CONFIG}/libonnxruntime.so \
         /opt/onnxruntime/lib
 """
     if target_platform() == "igpu":

From 6dfd21964927e4afe10cf795062688a7334c8681 Mon Sep 17 00:00:00 2001
From: Ted Themistokleous <tedthemistokleous@amd.com>
Date: Mon, 26 Feb 2024 23:36:40 +0000
Subject: [PATCH 29/32] Revert "Remove workspace dir and work off root for
 Onnxruntime"

This reverts commit 3386bf21d12d1ebb1ab7bce860346d3ea3fa3d3f.
---
 tools/gen_ort_dockerfile.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/tools/gen_ort_dockerfile.py b/tools/gen_ort_dockerfile.py
index 26c35b7..f63b65e 100755
--- a/tools/gen_ort_dockerfile.py
+++ b/tools/gen_ort_dockerfile.py
@@ -333,9 +333,9 @@ def dockerfile_for_linux(output_file):
         cuda_archs = "60;61;70;75;80;86;90"
 
         df += """
-    WORKDIR /onnxruntime
+    WORKDIR /workspace/onnxruntime
     ARG COMMON_BUILD_ARGS="--config ${{ONNXRUNTIME_BUILD_CONFIG}} --skip_submodule_sync --parallel --build_shared_lib \
-        --build_dir /build --cmake_extra_defines {}={} "
+        --build_dir /workspace/build --cmake_extra_defines {}={} "
     """.format(
             cmake_defs,
             cuda_archs
@@ -354,22 +354,22 @@ def dockerfile_for_linux(output_file):
     WORKDIR /opt/onnxruntime
 
     RUN mkdir -p /opt/onnxruntime && \
-        cp /onnxruntime/LICENSE /opt/onnxruntime && \
-        cat /onnxruntime/cmake/external/onnx/VERSION_NUMBER > /opt/onnxruntime/ort_onnx_version.txt
+        cp /workspace/onnxruntime/LICENSE /opt/onnxruntime && \
+        cat /workspace/onnxruntime/cmake/external/onnx/VERSION_NUMBER > /opt/onnxruntime/ort_onnx_version.txt
 
     # ONNX Runtime headers, libraries and binaries
     RUN mkdir -p /opt/onnxruntime/include && \
-        cp /onnxruntime/include/onnxruntime/core/session/onnxruntime_c_api.h \
+        cp /workspace/onnxruntime/include/onnxruntime/core/session/onnxruntime_c_api.h \
         /opt/onnxruntime/include && \
-        cp /onnxruntime/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h \
+        cp /workspace/onnxruntime/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h \
         /opt/onnxruntime/include && \
-        cp /onnxruntime/include/onnxruntime/core/providers/cpu/cpu_provider_factory.h \
+        cp /workspace/onnxruntime/include/onnxruntime/core/providers/cpu/cpu_provider_factory.h \
         /opt/onnxruntime/include
 
     RUN mkdir -p /opt/onnxruntime/lib && \
-        cp /build/${ONNXRUNTIME_BUILD_CONFIG}/libonnxruntime_providers_shared.so \
+        cp /workspace/build/${ONNXRUNTIME_BUILD_CONFIG}/libonnxruntime_providers_shared.so \
         /opt/onnxruntime/lib && \
-        cp /build/${ONNXRUNTIME_BUILD_CONFIG}/libonnxruntime.so \
+        cp /workspace/build/${ONNXRUNTIME_BUILD_CONFIG}/libonnxruntime.so \
         /opt/onnxruntime/lib
 """
     if target_platform() == "igpu":

From 4e3bf747e2746e1bd7162c9a1a55cf3f657046c6 Mon Sep 17 00:00:00 2001
From: Ted Themistokleous <tedthemistokleous@amd.com>
Date: Mon, 26 Feb 2024 23:38:21 +0000
Subject: [PATCH 30/32] Fix WORKDIR for onnruntime. errornously removed when
 adding back older pieces

---
 tools/gen_ort_dockerfile.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/gen_ort_dockerfile.py b/tools/gen_ort_dockerfile.py
index f63b65e..4c0c9e5 100755
--- a/tools/gen_ort_dockerfile.py
+++ b/tools/gen_ort_dockerfile.py
@@ -332,7 +332,7 @@ def dockerfile_for_linux(output_file):
     elif not FLAGS.enable_rocm:
         cuda_archs = "60;61;70;75;80;86;90"
 
-        df += """
+    df += """
     WORKDIR /workspace/onnxruntime
     ARG COMMON_BUILD_ARGS="--config ${{ONNXRUNTIME_BUILD_CONFIG}} --skip_submodule_sync --parallel --build_shared_lib \
         --build_dir /workspace/build --cmake_extra_defines {}={} "

From e24b3207a61f8f0fb0c5b7688d49ec3d1092a3f9 Mon Sep 17 00:00:00 2001
From: Ted Themistokleous <tedthemistokleous@amd.com>
Date: Tue, 27 Feb 2024 00:41:21 +0000
Subject: [PATCH 31/32] fixup! Fix WORKDIR for onnruntime. errornously removed
 when adding back older pieces

---
 tools/gen_ort_dockerfile.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/gen_ort_dockerfile.py b/tools/gen_ort_dockerfile.py
index 4c0c9e5..f8d123d 100755
--- a/tools/gen_ort_dockerfile.py
+++ b/tools/gen_ort_dockerfile.py
@@ -341,13 +341,13 @@ def dockerfile_for_linux(output_file):
             cuda_archs
         )
 
-        df += """
+    df += """
     RUN ./build.sh ${{COMMON_BUILD_ARGS}} --update --build {}
     """.format(
             ep_flags
         )
 
-        df += """
+    df += """
     #
     # Copy all artifacts needed by the backend to /opt/onnxruntime
     #

From e769b8280d659e6ecc82c480cb235fd947dd5751 Mon Sep 17 00:00:00 2001
From: Ted Themistokleous <tedthemistokleous@amd.com>
Date: Tue, 27 Feb 2024 03:29:15 +0000
Subject: [PATCH 32/32] Fix dir for migraphx_provider_factory.h

---
 tools/gen_ort_dockerfile.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tools/gen_ort_dockerfile.py b/tools/gen_ort_dockerfile.py
index f8d123d..d3f08e8 100755
--- a/tools/gen_ort_dockerfile.py
+++ b/tools/gen_ort_dockerfile.py
@@ -335,7 +335,7 @@ def dockerfile_for_linux(output_file):
     df += """
     WORKDIR /workspace/onnxruntime
     ARG COMMON_BUILD_ARGS="--config ${{ONNXRUNTIME_BUILD_CONFIG}} --skip_submodule_sync --parallel --build_shared_lib \
-        --build_dir /workspace/build --cmake_extra_defines {}={} "
+    --build_dir /workspace/build --cmake_extra_defines {}={} "
     """.format(
             cmake_defs,
             cuda_archs
@@ -408,8 +408,8 @@ def dockerfile_for_linux(output_file):
 
         if FLAGS.ort_migraphx:
             df += """
-    # TensorRT specific headers and libraries
-    RUN cp /workspace/onnxruntime/include/onnxruntime/core/providers/migraphx/migraphx_provider_factory.h \
+    # MIGraphX specific headers and libraries
+    RUN cp /workspace/onnxruntime/onnxruntime/core/providers/migraphx/migraphx_provider_factory.h \
         /opt/onnxruntime/include && \
         cp /workspace/build/${ONNXRUNTIME_BUILD_CONFIG}/libonnxruntime_providers_migraphx.so \
         /opt/onnxruntime/lib