diff --git a/onnxruntime/test/python/transformers/test_mha.py b/onnxruntime/test/python/transformers/test_mha.py
index 9e7c7378370c1..45726ecc7c2b0 100644
--- a/onnxruntime/test/python/transformers/test_mha.py
+++ b/onnxruntime/test/python/transformers/test_mha.py
@@ -892,7 +892,7 @@ def test_all(self):
         # Run tests sequentially to avoid out of memory issue.
         self.run_mha_cpu()
         self.run_mha_cuda()
-        self.run_lean_attention()
+        # self.run_lean_attention()
         self.run_mha_cuda_multi_threading_default()
         self.run_mha_cuda_multi_threading_cudnn()
         self.run_mha_cuda_multi_threading_efficient()
diff --git a/tools/ci_build/github/azure-pipelines/linux-gpu-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/linux-gpu-ci-pipeline.yml
index 4f3f56f2e27af..16c64243eb4ef 100644
--- a/tools/ci_build/github/azure-pipelines/linux-gpu-ci-pipeline.yml
+++ b/tools/ci_build/github/azure-pipelines/linux-gpu-ci-pipeline.yml
@@ -137,7 +137,7 @@ stages:
       skipComponentGovernanceDetection: true
     workspace:
       clean: all
-    pool: onnxruntime-Linux-GPU-T4
+    pool: Onnxruntime-Linux-A10-24G
     steps:
     - checkout: self
       clean: true
diff --git a/tools/ci_build/github/linux/build_cuda_ci.sh b/tools/ci_build/github/linux/build_cuda_ci.sh
index 0533b7b394492..fbf69d268a033 100755
--- a/tools/ci_build/github/linux/build_cuda_ci.sh
+++ b/tools/ci_build/github/linux/build_cuda_ci.sh
@@ -21,7 +21,7 @@ BUILD_ARGS=('--config'
             "--enable_pybind"
             "--build_java"
             "--cmake_extra_defines"
-            "CMAKE_CUDA_ARCHITECTURES=75"
+            "CMAKE_CUDA_ARCHITECTURES=86"
             "onnxruntime_BUILD_UNIT_TESTS=ON"
             "onnxruntime_ENABLE_CUDA_EP_INTERNAL_TESTS=ON")
 if [ -x "$(command -v ninja)" ]; then